xref: /petsc/src/dm/impls/da/fdda.c (revision b122ec5aa1bd4469eb4e0673542fb7de3f411254)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
18 {
19   PetscInt       i,j,nz,*fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i=0; i<w; i++) {
27     for (j=0; j<w; j++) {
28       if (dfill[w*i+j]) nz++;
29     }
30   }
31   CHKERRQ(PetscMalloc1(nz + w + 1,&fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i=0; i<w; i++) {
37     fill[i] = nz;
38     for (j=0; j<w; j++) {
39       if (dfill[w*i+j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
52 {
53   PetscInt       nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(0);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   CHKERRQ(PetscMalloc1(nz + w + 1,rfill));
63   CHKERRQ(PetscArraycpy(*rfill,dfillsparse,nz+w+1));
64   PetscFunctionReturn(0);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt       i,k,cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   CHKERRQ(PetscCalloc1(dd->w,&dd->ofillcols));
76   for (i=0; i<dd->w; i++) {
77     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i=0; i<dd->w; i++) {
80     if (dd->ofillcols[i]) {
81       dd->ofillcols[i] = cnt++;
82     }
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 /*@
88     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
89     of the matrix returned by DMCreateMatrix().
90 
91     Logically Collective on da
92 
93     Input Parameters:
94 +   da - the distributed array
95 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
96 -   ofill - the fill pattern in the off-diagonal blocks
97 
98     Level: developer
99 
100     Notes:
101     This only makes sense when you are doing multicomponent problems but using the
102        MPIAIJ matrix format
103 
104            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
105        representing coupling and 0 entries for missing coupling. For example
106 $             dfill[9] = {1, 0, 0,
107 $                         1, 1, 0,
108 $                         0, 1, 1}
109        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111        diagonal block).
112 
113      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
114      can be represented in the dfill, ofill format
115 
116    Contributed by Glenn Hammond
117 
118 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
119 
120 @*/
121 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
122 {
123   DM_DA          *dd = (DM_DA*)da->data;
124 
125   PetscFunctionBegin;
126   /* save the given dfill and ofill information */
127   CHKERRQ(DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill));
128   CHKERRQ(DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill));
129 
130   /* count nonzeros in ofill columns */
131   CHKERRQ(DMDASetBlockFills_Private2(dd));
132 
133   PetscFunctionReturn(0);
134 }
135 
136 /*@
137     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
138     of the matrix returned by DMCreateMatrix(), using sparse representations
139     of fill patterns.
140 
141     Logically Collective on da
142 
143     Input Parameters:
144 +   da - the distributed array
145 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
146 -   ofill - the sparse fill pattern in the off-diagonal blocks
147 
148     Level: developer
149 
150     Notes: This only makes sense when you are doing multicomponent problems but using the
151        MPIAIJ matrix format
152 
153            The format for dfill and ofill is a sparse representation of a
154            dof-by-dof matrix with 1 entries representing coupling and 0 entries
155            for missing coupling.  The sparse representation is a 1 dimensional
156            array of length nz + dof + 1, where nz is the number of non-zeros in
157            the matrix.  The first dof entries in the array give the
158            starting array indices of each row's items in the rest of the array,
159            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
160            and the remaining nz items give the column indices of each of
161            the 1s within the logical 2D matrix.  Each row's items within
162            the array are the column indices of the 1s within that row
163            of the 2D matrix.  PETSc developers may recognize that this is the
164            same format as that computed by the DMDASetBlockFills_Private()
165            function from a dense 2D matrix representation.
166 
167      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
168      can be represented in the dfill, ofill format
169 
170    Contributed by Philip C. Roth
171 
172 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
173 
174 @*/
175 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
176 {
177   DM_DA          *dd = (DM_DA*)da->data;
178 
179   PetscFunctionBegin;
180   /* save the given dfill and ofill information */
181   CHKERRQ(DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill));
182   CHKERRQ(DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill));
183 
184   /* count nonzeros in ofill columns */
185   CHKERRQ(DMDASetBlockFills_Private2(dd));
186 
187   PetscFunctionReturn(0);
188 }
189 
190 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
191 {
192   PetscInt         dim,m,n,p,nc;
193   DMBoundaryType   bx,by,bz;
194   MPI_Comm         comm;
195   PetscMPIInt      size;
196   PetscBool        isBAIJ;
197   DM_DA            *dd = (DM_DA*)da->data;
198 
199   PetscFunctionBegin;
200   /*
201                                   m
202           ------------------------------------------------------
203          |                                                     |
204          |                                                     |
205          |               ----------------------                |
206          |               |                    |                |
207       n  |           yn  |                    |                |
208          |               |                    |                |
209          |               .---------------------                |
210          |             (xs,ys)     xn                          |
211          |            .                                        |
212          |         (gxs,gys)                                   |
213          |                                                     |
214           -----------------------------------------------------
215   */
216 
217   /*
218          nc - number of components per grid point
219          col - number of colors needed in one direction for single component problem
220 
221   */
222   CHKERRQ(DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL));
223 
224   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
225   CHKERRMPI(MPI_Comm_size(comm,&size));
226   if (ctype == IS_COLORING_LOCAL) {
227     if (size == 1) {
228       ctype = IS_COLORING_GLOBAL;
229     } else if (dim > 1) {
230       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
231         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
232       }
233     }
234   }
235 
236   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
237      matrices is for the blocks, not the individual matrix elements  */
238   CHKERRQ(PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ));
239   if (!isBAIJ) CHKERRQ(PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ));
240   if (!isBAIJ) CHKERRQ(PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ));
241   if (isBAIJ) {
242     dd->w  = 1;
243     dd->xs = dd->xs/nc;
244     dd->xe = dd->xe/nc;
245     dd->Xs = dd->Xs/nc;
246     dd->Xe = dd->Xe/nc;
247   }
248 
249   /*
250      We do not provide a getcoloring function in the DMDA operations because
251    the basic DMDA does not know about matrices. We think of DMDA as being
252    more low-level then matrices.
253   */
254   if (dim == 1) {
255     CHKERRQ(DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring));
256   } else if (dim == 2) {
257     CHKERRQ(DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring));
258   } else if (dim == 3) {
259     CHKERRQ(DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring));
260   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
261   if (isBAIJ) {
262     dd->w  = nc;
263     dd->xs = dd->xs*nc;
264     dd->xe = dd->xe*nc;
265     dd->Xs = dd->Xs*nc;
266     dd->Xe = dd->Xe*nc;
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 /* ---------------------------------------------------------------------------------*/
272 
273 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
274 {
275   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
276   PetscInt        ncolors;
277   MPI_Comm        comm;
278   DMBoundaryType  bx,by;
279   DMDAStencilType st;
280   ISColoringValue *colors;
281   DM_DA           *dd = (DM_DA*)da->data;
282 
283   PetscFunctionBegin;
284   /*
285          nc - number of components per grid point
286          col - number of colors needed in one direction for single component problem
287 
288   */
289   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st));
290   col  = 2*s + 1;
291   CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
292   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
293   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
294 
295   /* special case as taught to us by Paul Hovland */
296   if (st == DMDA_STENCIL_STAR && s == 1) {
297     CHKERRQ(DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring));
298   } else {
299     if (ctype == IS_COLORING_GLOBAL) {
300       if (!dd->localcoloring) {
301         CHKERRQ(PetscMalloc1(nc*nx*ny,&colors));
302         ii   = 0;
303         for (j=ys; j<ys+ny; j++) {
304           for (i=xs; i<xs+nx; i++) {
305             for (k=0; k<nc; k++) {
306               colors[ii++] = k + nc*((i % col) + col*(j % col));
307             }
308           }
309         }
310         ncolors = nc + nc*(col-1 + col*(col-1));
311         CHKERRQ(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring));
312       }
313       *coloring = dd->localcoloring;
314     } else if (ctype == IS_COLORING_LOCAL) {
315       if (!dd->ghostedcoloring) {
316         CHKERRQ(PetscMalloc1(nc*gnx*gny,&colors));
317         ii   = 0;
318         for (j=gys; j<gys+gny; j++) {
319           for (i=gxs; i<gxs+gnx; i++) {
320             for (k=0; k<nc; k++) {
321               /* the complicated stuff is to handle periodic boundaries */
322               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
323             }
324           }
325         }
326         ncolors = nc + nc*(col - 1 + col*(col-1));
327         CHKERRQ(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
328         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
329 
330         CHKERRQ(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
331       }
332       *coloring = dd->ghostedcoloring;
333     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
334   }
335   CHKERRQ(ISColoringReference(*coloring));
336   PetscFunctionReturn(0);
337 }
338 
339 /* ---------------------------------------------------------------------------------*/
340 
341 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
342 {
343   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
344   PetscInt        ncolors;
345   MPI_Comm        comm;
346   DMBoundaryType  bx,by,bz;
347   DMDAStencilType st;
348   ISColoringValue *colors;
349   DM_DA           *dd = (DM_DA*)da->data;
350 
351   PetscFunctionBegin;
352   /*
353          nc - number of components per grid point
354          col - number of colors needed in one direction for single component problem
355 
356   */
357   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
358   col  = 2*s + 1;
359   CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
360   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
361   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
362 
363   /* create the coloring */
364   if (ctype == IS_COLORING_GLOBAL) {
365     if (!dd->localcoloring) {
366       CHKERRQ(PetscMalloc1(nc*nx*ny*nz,&colors));
367       ii   = 0;
368       for (k=zs; k<zs+nz; k++) {
369         for (j=ys; j<ys+ny; j++) {
370           for (i=xs; i<xs+nx; i++) {
371             for (l=0; l<nc; l++) {
372               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
373             }
374           }
375         }
376       }
377       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
378       CHKERRQ(ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring));
379     }
380     *coloring = dd->localcoloring;
381   } else if (ctype == IS_COLORING_LOCAL) {
382     if (!dd->ghostedcoloring) {
383       CHKERRQ(PetscMalloc1(nc*gnx*gny*gnz,&colors));
384       ii   = 0;
385       for (k=gzs; k<gzs+gnz; k++) {
386         for (j=gys; j<gys+gny; j++) {
387           for (i=gxs; i<gxs+gnx; i++) {
388             for (l=0; l<nc; l++) {
389               /* the complicated stuff is to handle periodic boundaries */
390               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
391             }
392           }
393         }
394       }
395       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
396       CHKERRQ(ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
397       CHKERRQ(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
398     }
399     *coloring = dd->ghostedcoloring;
400   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
401   CHKERRQ(ISColoringReference(*coloring));
402   PetscFunctionReturn(0);
403 }
404 
405 /* ---------------------------------------------------------------------------------*/
406 
407 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
408 {
409   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
410   PetscInt        ncolors;
411   MPI_Comm        comm;
412   DMBoundaryType  bx;
413   ISColoringValue *colors;
414   DM_DA           *dd = (DM_DA*)da->data;
415 
416   PetscFunctionBegin;
417   /*
418          nc - number of components per grid point
419          col - number of colors needed in one direction for single component problem
420 
421   */
422   CHKERRQ(DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
423   col  = 2*s + 1;
424   CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
425   CHKERRQ(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
426   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
427 
428   /* create the coloring */
429   if (ctype == IS_COLORING_GLOBAL) {
430     if (!dd->localcoloring) {
431       CHKERRQ(PetscMalloc1(nc*nx,&colors));
432       if (dd->ofillcols) {
433         PetscInt tc = 0;
434         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
435         i1 = 0;
436         for (i=xs; i<xs+nx; i++) {
437           for (l=0; l<nc; l++) {
438             if (dd->ofillcols[l] && (i % col)) {
439               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
440             } else {
441               colors[i1++] = l;
442             }
443           }
444         }
445         ncolors = nc + 2*s*tc;
446       } else {
447         i1 = 0;
448         for (i=xs; i<xs+nx; i++) {
449           for (l=0; l<nc; l++) {
450             colors[i1++] = l + nc*(i % col);
451           }
452         }
453         ncolors = nc + nc*(col-1);
454       }
455       CHKERRQ(ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring));
456     }
457     *coloring = dd->localcoloring;
458   } else if (ctype == IS_COLORING_LOCAL) {
459     if (!dd->ghostedcoloring) {
460       CHKERRQ(PetscMalloc1(nc*gnx,&colors));
461       i1   = 0;
462       for (i=gxs; i<gxs+gnx; i++) {
463         for (l=0; l<nc; l++) {
464           /* the complicated stuff is to handle periodic boundaries */
465           colors[i1++] = l + nc*(SetInRange(i,m) % col);
466         }
467       }
468       ncolors = nc + nc*(col-1);
469       CHKERRQ(ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
470       CHKERRQ(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
471     }
472     *coloring = dd->ghostedcoloring;
473   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
474   CHKERRQ(ISColoringReference(*coloring));
475   PetscFunctionReturn(0);
476 }
477 
478 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
479 {
480   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
481   PetscInt        ncolors;
482   MPI_Comm        comm;
483   DMBoundaryType  bx,by;
484   ISColoringValue *colors;
485   DM_DA           *dd = (DM_DA*)da->data;
486 
487   PetscFunctionBegin;
488   /*
489          nc - number of components per grid point
490          col - number of colors needed in one direction for single component problem
491 
492   */
493   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL));
494   CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
495   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
496   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
497   /* create the coloring */
498   if (ctype == IS_COLORING_GLOBAL) {
499     if (!dd->localcoloring) {
500       CHKERRQ(PetscMalloc1(nc*nx*ny,&colors));
501       ii   = 0;
502       for (j=ys; j<ys+ny; j++) {
503         for (i=xs; i<xs+nx; i++) {
504           for (k=0; k<nc; k++) {
505             colors[ii++] = k + nc*((3*j+i) % 5);
506           }
507         }
508       }
509       ncolors = 5*nc;
510       CHKERRQ(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring));
511     }
512     *coloring = dd->localcoloring;
513   } else if (ctype == IS_COLORING_LOCAL) {
514     if (!dd->ghostedcoloring) {
515       CHKERRQ(PetscMalloc1(nc*gnx*gny,&colors));
516       ii = 0;
517       for (j=gys; j<gys+gny; j++) {
518         for (i=gxs; i<gxs+gnx; i++) {
519           for (k=0; k<nc; k++) {
520             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
521           }
522         }
523       }
524       ncolors = 5*nc;
525       CHKERRQ(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring));
526       CHKERRQ(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL));
527     }
528     *coloring = dd->ghostedcoloring;
529   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
530   PetscFunctionReturn(0);
531 }
532 
533 /* =========================================================================== */
534 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
535 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
536 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat);
537 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
538 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
539 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
540 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
541 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
542 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
543 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
544 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
545 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
546 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
547 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
548 
549 /*@C
550    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
551 
552    Logically Collective on mat
553 
554    Input Parameters:
555 +  mat - the matrix
556 -  da - the da
557 
558    Level: intermediate
559 
560 @*/
561 PetscErrorCode MatSetupDM(Mat mat,DM da)
562 {
563   PetscFunctionBegin;
564   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
565   PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA);
566   CHKERRQ(PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da)));
567   PetscFunctionReturn(0);
568 }
569 
570 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
571 {
572   DM                da;
573   const char        *prefix;
574   Mat               Anatural;
575   AO                ao;
576   PetscInt          rstart,rend,*petsc,i;
577   IS                is;
578   MPI_Comm          comm;
579   PetscViewerFormat format;
580 
581   PetscFunctionBegin;
582   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
583   CHKERRQ(PetscViewerGetFormat(viewer,&format));
584   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
585 
586   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
587   CHKERRQ(MatGetDM(A, &da));
588   PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
589 
590   CHKERRQ(DMDAGetAO(da,&ao));
591   CHKERRQ(MatGetOwnershipRange(A,&rstart,&rend));
592   CHKERRQ(PetscMalloc1(rend-rstart,&petsc));
593   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
594   CHKERRQ(AOApplicationToPetsc(ao,rend-rstart,petsc));
595   CHKERRQ(ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is));
596 
597   /* call viewer on natural ordering */
598   CHKERRQ(MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural));
599   CHKERRQ(ISDestroy(&is));
600   CHKERRQ(PetscObjectGetOptionsPrefix((PetscObject)A,&prefix));
601   CHKERRQ(PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix));
602   CHKERRQ(PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name));
603   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
604   CHKERRQ(MatView(Anatural,viewer));
605   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
606   CHKERRQ(MatDestroy(&Anatural));
607   PetscFunctionReturn(0);
608 }
609 
610 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
611 {
612   DM             da;
613   Mat            Anatural,Aapp;
614   AO             ao;
615   PetscInt       rstart,rend,*app,i,m,n,M,N;
616   IS             is;
617   MPI_Comm       comm;
618 
619   PetscFunctionBegin;
620   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
621   CHKERRQ(MatGetDM(A, &da));
622   PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
623 
624   /* Load the matrix in natural ordering */
625   CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&Anatural));
626   CHKERRQ(MatSetType(Anatural,((PetscObject)A)->type_name));
627   CHKERRQ(MatGetSize(A,&M,&N));
628   CHKERRQ(MatGetLocalSize(A,&m,&n));
629   CHKERRQ(MatSetSizes(Anatural,m,n,M,N));
630   CHKERRQ(MatLoad(Anatural,viewer));
631 
632   /* Map natural ordering to application ordering and create IS */
633   CHKERRQ(DMDAGetAO(da,&ao));
634   CHKERRQ(MatGetOwnershipRange(Anatural,&rstart,&rend));
635   CHKERRQ(PetscMalloc1(rend-rstart,&app));
636   for (i=rstart; i<rend; i++) app[i-rstart] = i;
637   CHKERRQ(AOPetscToApplication(ao,rend-rstart,app));
638   CHKERRQ(ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is));
639 
640   /* Do permutation and replace header */
641   CHKERRQ(MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp));
642   CHKERRQ(MatHeaderReplace(A,&Aapp));
643   CHKERRQ(ISDestroy(&is));
644   CHKERRQ(MatDestroy(&Anatural));
645   PetscFunctionReturn(0);
646 }
647 
648 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
649 {
650   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
651   Mat            A;
652   MPI_Comm       comm;
653   MatType        Atype;
654   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
655   MatType        mtype;
656   PetscMPIInt    size;
657   DM_DA          *dd = (DM_DA*)da->data;
658 
659   PetscFunctionBegin;
660   CHKERRQ(MatInitializePackage());
661   mtype = da->mattype;
662 
663   /*
664                                   m
665           ------------------------------------------------------
666          |                                                     |
667          |                                                     |
668          |               ----------------------                |
669          |               |                    |                |
670       n  |           ny  |                    |                |
671          |               |                    |                |
672          |               .---------------------                |
673          |             (xs,ys)     nx                          |
674          |            .                                        |
675          |         (gxs,gys)                                   |
676          |                                                     |
677           -----------------------------------------------------
678   */
679 
680   /*
681          nc - number of components per grid point
682          col - number of colors needed in one direction for single component problem
683 
684   */
685   M   = dd->M;
686   N   = dd->N;
687   P   = dd->P;
688   dim = da->dim;
689   dof = dd->w;
690   /* CHKERRQ(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
691   CHKERRQ(DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz));
692   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
693   CHKERRQ(MatCreate(comm,&A));
694   CHKERRQ(MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P));
695   CHKERRQ(MatSetType(A,mtype));
696   CHKERRQ(MatSetFromOptions(A));
697   if (dof*nx*ny*nz < da->bind_below) {
698     CHKERRQ(MatSetBindingPropagates(A,PETSC_TRUE));
699     CHKERRQ(MatBindToCPU(A,PETSC_TRUE));
700   }
701   CHKERRQ(MatSetDM(A,da));
702   if (da->structure_only) {
703     CHKERRQ(MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE));
704   }
705   CHKERRQ(MatGetType(A,&Atype));
706   /*
707      We do not provide a getmatrix function in the DMDA operations because
708    the basic DMDA does not know about matrices. We think of DMDA as being more
709    more low-level than matrices. This is kind of cheating but, cause sometimes
710    we think of DMDA has higher level than matrices.
711 
712      We could switch based on Atype (or mtype), but we do not since the
713    specialized setting routines depend only on the particular preallocation
714    details of the matrix, not the type itself.
715   */
716   CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij));
717   if (!aij) {
718     CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij));
719   }
720   if (!aij) {
721     CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij));
722     if (!baij) {
723       CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij));
724     }
725     if (!baij) {
726       CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij));
727       if (!sbaij) {
728         CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij));
729       }
730       if (!sbaij) {
731         CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell));
732         if (!sell) {
733           CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell));
734         }
735       }
736       if (!sell) {
737         CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is));
738       }
739     }
740   }
741   if (aij) {
742     if (dim == 1) {
743       if (dd->ofill) {
744         CHKERRQ(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A));
745       } else {
746         DMBoundaryType bx;
747         PetscMPIInt  size;
748         CHKERRQ(DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL));
749         CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size));
750         if (size == 1 && bx == DM_BOUNDARY_NONE) {
751           CHKERRQ(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A));
752         } else {
753           CHKERRQ(DMCreateMatrix_DA_1d_MPIAIJ(da,A));
754         }
755       }
756     } else if (dim == 2) {
757       if (dd->ofill) {
758         CHKERRQ(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A));
759       } else {
760         CHKERRQ(DMCreateMatrix_DA_2d_MPIAIJ(da,A));
761       }
762     } else if (dim == 3) {
763       if (dd->ofill) {
764         CHKERRQ(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A));
765       } else {
766         CHKERRQ(DMCreateMatrix_DA_3d_MPIAIJ(da,A));
767       }
768     }
769   } else if (baij) {
770     if (dim == 2) {
771       CHKERRQ(DMCreateMatrix_DA_2d_MPIBAIJ(da,A));
772     } else if (dim == 3) {
773       CHKERRQ(DMCreateMatrix_DA_3d_MPIBAIJ(da,A));
774     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
775   } else if (sbaij) {
776     if (dim == 2) {
777       CHKERRQ(DMCreateMatrix_DA_2d_MPISBAIJ(da,A));
778     } else if (dim == 3) {
779       CHKERRQ(DMCreateMatrix_DA_3d_MPISBAIJ(da,A));
780     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
781   } else if (sell) {
782      if (dim == 2) {
783        CHKERRQ(DMCreateMatrix_DA_2d_MPISELL(da,A));
784      } else if (dim == 3) {
785        CHKERRQ(DMCreateMatrix_DA_3d_MPISELL(da,A));
786      } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
787   } else if (is) {
788     CHKERRQ(DMCreateMatrix_DA_IS(da,A));
789   } else {
790     ISLocalToGlobalMapping ltog;
791 
792     CHKERRQ(MatSetBlockSize(A,dof));
793     CHKERRQ(MatSetUp(A));
794     CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
795     CHKERRQ(MatSetLocalToGlobalMapping(A,ltog,ltog));
796   }
797   CHKERRQ(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]));
798   CHKERRQ(MatSetStencil(A,dim,dims,starts,dof));
799   CHKERRQ(MatSetDM(A,da));
800   CHKERRMPI(MPI_Comm_size(comm,&size));
801   if (size > 1) {
802     /* change viewer to display matrix in natural ordering */
803     CHKERRQ(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
804     CHKERRQ(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
805   }
806   *J = A;
807   PetscFunctionReturn(0);
808 }
809 
810 /* ---------------------------------------------------------------------------------*/
811 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
812 
813 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
814 {
815   DM_DA                  *da = (DM_DA*)dm->data;
816   Mat                    lJ,P;
817   ISLocalToGlobalMapping ltog;
818   IS                     is;
819   PetscBT                bt;
820   const PetscInt         *e_loc,*idx;
821   PetscInt               i,nel,nen,nv,dof,*gidx,n,N;
822 
823   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
824      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
825   PetscFunctionBegin;
826   dof  = da->w;
827   CHKERRQ(MatSetBlockSize(J,dof));
828   CHKERRQ(DMGetLocalToGlobalMapping(dm,&ltog));
829 
830   /* flag local elements indices in local DMDA numbering */
831   CHKERRQ(ISLocalToGlobalMappingGetSize(ltog,&nv));
832   CHKERRQ(PetscBTCreate(nv/dof,&bt));
833   CHKERRQ(DMDAGetElements(dm,&nel,&nen,&e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
834   for (i=0;i<nel*nen;i++) CHKERRQ(PetscBTSet(bt,e_loc[i]));
835 
836   /* filter out (set to -1) the global indices not used by the local elements */
837   CHKERRQ(PetscMalloc1(nv/dof,&gidx));
838   CHKERRQ(ISLocalToGlobalMappingGetBlockIndices(ltog,&idx));
839   CHKERRQ(PetscArraycpy(gidx,idx,nv/dof));
840   CHKERRQ(ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx));
841   for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1;
842   CHKERRQ(PetscBTDestroy(&bt));
843   CHKERRQ(ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is));
844   CHKERRQ(ISLocalToGlobalMappingCreateIS(is,&ltog));
845   CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
846   CHKERRQ(ISLocalToGlobalMappingDestroy(&ltog));
847   CHKERRQ(ISDestroy(&is));
848 
849   /* Preallocation */
850   if (dm->prealloc_skip) {
851     CHKERRQ(MatSetUp(J));
852   } else {
853     CHKERRQ(MatISGetLocalMat(J,&lJ));
854     CHKERRQ(MatGetLocalToGlobalMapping(lJ,&ltog,NULL));
855     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)lJ),&P));
856     CHKERRQ(MatSetType(P,MATPREALLOCATOR));
857     CHKERRQ(MatSetLocalToGlobalMapping(P,ltog,ltog));
858     CHKERRQ(MatGetSize(lJ,&N,NULL));
859     CHKERRQ(MatGetLocalSize(lJ,&n,NULL));
860     CHKERRQ(MatSetSizes(P,n,n,N,N));
861     CHKERRQ(MatSetBlockSize(P,dof));
862     CHKERRQ(MatSetUp(P));
863     for (i=0;i<nel;i++) {
864       CHKERRQ(MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES));
865     }
866     CHKERRQ(MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ));
867     CHKERRQ(MatISRestoreLocalMat(J,&lJ));
868     CHKERRQ(DMDARestoreElements(dm,&nel,&nen,&e_loc));
869     CHKERRQ(MatDestroy(&P));
870 
871     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
872     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
873   }
874   PetscFunctionReturn(0);
875 }
876 
877 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
878 {
879   PetscErrorCode         ierr;
880   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
881   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
882   MPI_Comm               comm;
883   PetscScalar            *values;
884   DMBoundaryType         bx,by;
885   ISLocalToGlobalMapping ltog;
886   DMDAStencilType        st;
887 
888   PetscFunctionBegin;
889   /*
890          nc - number of components per grid point
891          col - number of colors needed in one direction for single component problem
892 
893   */
894   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
895   col  = 2*s + 1;
896   CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
897   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
898   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
899 
900   CHKERRQ(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols));
901   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
902 
903   CHKERRQ(MatSetBlockSize(J,nc));
904   /* determine the matrix preallocation information */
905   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
906   for (i=xs; i<xs+nx; i++) {
907 
908     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
909     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
910 
911     for (j=ys; j<ys+ny; j++) {
912       slot = i - gxs + gnx*(j - gys);
913 
914       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
915       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
916 
917       cnt = 0;
918       for (k=0; k<nc; k++) {
919         for (l=lstart; l<lend+1; l++) {
920           for (p=pstart; p<pend+1; p++) {
921             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
922               cols[cnt++] = k + nc*(slot + gnx*l + p);
923             }
924           }
925         }
926         rows[k] = k + nc*(slot);
927       }
928       CHKERRQ(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
929     }
930   }
931   CHKERRQ(MatSetBlockSize(J,nc));
932   CHKERRQ(MatSeqSELLSetPreallocation(J,0,dnz));
933   CHKERRQ(MatMPISELLSetPreallocation(J,0,dnz,0,onz));
934   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
935 
936   CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
937 
938   /*
939     For each node in the grid: we get the neighbors in the local (on processor ordering
940     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
941     PETSc ordering.
942   */
943   if (!da->prealloc_only) {
944     CHKERRQ(PetscCalloc1(col*col*nc*nc,&values));
945     for (i=xs; i<xs+nx; i++) {
946 
947       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
948       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
949 
950       for (j=ys; j<ys+ny; j++) {
951         slot = i - gxs + gnx*(j - gys);
952 
953         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
954         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
955 
956         cnt = 0;
957         for (k=0; k<nc; k++) {
958           for (l=lstart; l<lend+1; l++) {
959             for (p=pstart; p<pend+1; p++) {
960               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
961                 cols[cnt++] = k + nc*(slot + gnx*l + p);
962               }
963             }
964           }
965           rows[k] = k + nc*(slot);
966         }
967         CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES));
968       }
969     }
970     CHKERRQ(PetscFree(values));
971     /* do not copy values to GPU since they are all zero and not yet needed there */
972     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
973     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
974     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
975     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
976     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
977   }
978   CHKERRQ(PetscFree2(rows,cols));
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
983 {
984   PetscErrorCode         ierr;
985   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
986   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
987   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
988   MPI_Comm               comm;
989   PetscScalar            *values;
990   DMBoundaryType         bx,by,bz;
991   ISLocalToGlobalMapping ltog;
992   DMDAStencilType        st;
993 
994   PetscFunctionBegin;
995   /*
996          nc - number of components per grid point
997          col - number of colors needed in one direction for single component problem
998 
999   */
1000   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
1001   col  = 2*s + 1;
1002   CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1003   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1004   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
1005 
1006   CHKERRQ(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols));
1007   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
1008 
1009   CHKERRQ(MatSetBlockSize(J,nc));
1010   /* determine the matrix preallocation information */
1011   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1012   for (i=xs; i<xs+nx; i++) {
1013     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1014     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1015     for (j=ys; j<ys+ny; j++) {
1016       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1017       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1018       for (k=zs; k<zs+nz; k++) {
1019         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1020         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1021 
1022         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1023 
1024         cnt = 0;
1025         for (l=0; l<nc; l++) {
1026           for (ii=istart; ii<iend+1; ii++) {
1027             for (jj=jstart; jj<jend+1; jj++) {
1028               for (kk=kstart; kk<kend+1; kk++) {
1029                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1030                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1031                 }
1032               }
1033             }
1034           }
1035           rows[l] = l + nc*(slot);
1036         }
1037         CHKERRQ(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1038       }
1039     }
1040   }
1041   CHKERRQ(MatSetBlockSize(J,nc));
1042   CHKERRQ(MatSeqSELLSetPreallocation(J,0,dnz));
1043   CHKERRQ(MatMPISELLSetPreallocation(J,0,dnz,0,onz));
1044   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1045   CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
1046 
1047   /*
1048     For each node in the grid: we get the neighbors in the local (on processor ordering
1049     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1050     PETSc ordering.
1051   */
1052   if (!da->prealloc_only) {
1053     CHKERRQ(PetscCalloc1(col*col*col*nc*nc*nc,&values));
1054     for (i=xs; i<xs+nx; i++) {
1055       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1056       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1057       for (j=ys; j<ys+ny; j++) {
1058         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1059         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1060         for (k=zs; k<zs+nz; k++) {
1061           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1062           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1063 
1064           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1065 
1066           cnt = 0;
1067           for (l=0; l<nc; l++) {
1068             for (ii=istart; ii<iend+1; ii++) {
1069               for (jj=jstart; jj<jend+1; jj++) {
1070                 for (kk=kstart; kk<kend+1; kk++) {
1071                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1072                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1073                   }
1074                 }
1075               }
1076             }
1077             rows[l] = l + nc*(slot);
1078           }
1079           CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES));
1080         }
1081       }
1082     }
1083     CHKERRQ(PetscFree(values));
1084     /* do not copy values to GPU since they are all zero and not yet needed there */
1085     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
1086     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1087     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1088     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
1089     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1090   }
1091   CHKERRQ(PetscFree2(rows,cols));
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1096 {
1097   PetscErrorCode         ierr;
1098   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1099   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1100   MPI_Comm               comm;
1101   DMBoundaryType         bx,by;
1102   ISLocalToGlobalMapping ltog,mltog;
1103   DMDAStencilType        st;
1104   PetscBool              removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;
1105 
1106   PetscFunctionBegin;
1107   /*
1108          nc - number of components per grid point
1109          col - number of colors needed in one direction for single component problem
1110 
1111   */
1112   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
1113   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1114     CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1115   }
1116   col  = 2*s + 1;
1117   /*
1118        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1119        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1120   */
1121   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1122   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1123   CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1124   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1125   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
1126 
1127   CHKERRQ(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols));
1128   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
1129 
1130   CHKERRQ(MatSetBlockSize(J,nc));
1131   /* determine the matrix preallocation information */
1132   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1133   for (i=xs; i<xs+nx; i++) {
1134     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1135     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1136 
1137     for (j=ys; j<ys+ny; j++) {
1138       slot = i - gxs + gnx*(j - gys);
1139 
1140       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1141       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1142 
1143       cnt = 0;
1144       for (k=0; k<nc; k++) {
1145         for (l=lstart; l<lend+1; l++) {
1146           for (p=pstart; p<pend+1; p++) {
1147             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1148               cols[cnt++] = k + nc*(slot + gnx*l + p);
1149             }
1150           }
1151         }
1152         rows[k] = k + nc*(slot);
1153       }
1154       if (removedups) {
1155         CHKERRQ(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1156       } else {
1157         CHKERRQ(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1158       }
1159     }
1160   }
1161   CHKERRQ(MatSetBlockSize(J,nc));
1162   CHKERRQ(MatSeqAIJSetPreallocation(J,0,dnz));
1163   CHKERRQ(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1164   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1165   CHKERRQ(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1166   if (!mltog) {
1167     CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
1168   }
1169 
1170   /*
1171     For each node in the grid: we get the neighbors in the local (on processor ordering
1172     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1173     PETSc ordering.
1174   */
1175   if (!da->prealloc_only) {
1176     for (i=xs; i<xs+nx; i++) {
1177 
1178       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1179       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1180 
1181       for (j=ys; j<ys+ny; j++) {
1182         slot = i - gxs + gnx*(j - gys);
1183 
1184         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1185         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1186 
1187         cnt = 0;
1188         for (l=lstart; l<lend+1; l++) {
1189           for (p=pstart; p<pend+1; p++) {
1190             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1191               cols[cnt++] = nc*(slot + gnx*l + p);
1192               for (k=1; k<nc; k++) {
1193                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1194               }
1195             }
1196           }
1197         }
1198         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1199         CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1200       }
1201     }
1202     /* do not copy values to GPU since they are all zero and not yet needed there */
1203     CHKERRQ(MatBoundToCPU(J,&alreadyboundtocpu));
1204     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
1205     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1206     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1207     if (!alreadyboundtocpu) CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
1208     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1209     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1210       CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1211     }
1212   }
1213   CHKERRQ(PetscFree2(rows,cols));
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1218 {
1219   PetscErrorCode         ierr;
1220   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1221   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1222   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1223   DM_DA                  *dd = (DM_DA*)da->data;
1224   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1225   MPI_Comm               comm;
1226   DMBoundaryType         bx,by;
1227   ISLocalToGlobalMapping ltog;
1228   DMDAStencilType        st;
1229   PetscBool              removedups = PETSC_FALSE;
1230 
1231   PetscFunctionBegin;
1232   /*
1233          nc - number of components per grid point
1234          col - number of colors needed in one direction for single component problem
1235 
1236   */
1237   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
1238   col  = 2*s + 1;
1239   /*
1240        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1241        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1242   */
1243   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1244   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1245   CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1246   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1247   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
1248 
1249   CHKERRQ(PetscMalloc1(col*col*nc,&cols));
1250   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
1251 
1252   CHKERRQ(MatSetBlockSize(J,nc));
1253   /* determine the matrix preallocation information */
1254   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1255   for (i=xs; i<xs+nx; i++) {
1256 
1257     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1258     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1259 
1260     for (j=ys; j<ys+ny; j++) {
1261       slot = i - gxs + gnx*(j - gys);
1262 
1263       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1264       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1265 
1266       for (k=0; k<nc; k++) {
1267         cnt = 0;
1268         for (l=lstart; l<lend+1; l++) {
1269           for (p=pstart; p<pend+1; p++) {
1270             if (l || p) {
1271               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1272                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1273               }
1274             } else {
1275               if (dfill) {
1276                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1277               } else {
1278                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1279               }
1280             }
1281           }
1282         }
1283         row    = k + nc*(slot);
1284         maxcnt = PetscMax(maxcnt,cnt);
1285         if (removedups) {
1286           CHKERRQ(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz));
1287         } else {
1288           CHKERRQ(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz));
1289         }
1290       }
1291     }
1292   }
1293   CHKERRQ(MatSeqAIJSetPreallocation(J,0,dnz));
1294   CHKERRQ(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1295   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1296   CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
1297 
1298   /*
1299     For each node in the grid: we get the neighbors in the local (on processor ordering
1300     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1301     PETSc ordering.
1302   */
1303   if (!da->prealloc_only) {
1304     for (i=xs; i<xs+nx; i++) {
1305 
1306       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1307       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1308 
1309       for (j=ys; j<ys+ny; j++) {
1310         slot = i - gxs + gnx*(j - gys);
1311 
1312         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1313         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1314 
1315         for (k=0; k<nc; k++) {
1316           cnt = 0;
1317           for (l=lstart; l<lend+1; l++) {
1318             for (p=pstart; p<pend+1; p++) {
1319               if (l || p) {
1320                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1321                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1322                 }
1323               } else {
1324                 if (dfill) {
1325                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1326                 } else {
1327                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1328                 }
1329               }
1330             }
1331           }
1332           row  = k + nc*(slot);
1333           CHKERRQ(MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1334         }
1335       }
1336     }
1337     /* do not copy values to GPU since they are all zero and not yet needed there */
1338     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
1339     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1340     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1341     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
1342     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1343   }
1344   CHKERRQ(PetscFree(cols));
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 /* ---------------------------------------------------------------------------------*/
1349 
1350 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1351 {
1352   PetscErrorCode         ierr;
1353   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1354   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1355   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1356   MPI_Comm               comm;
1357   DMBoundaryType         bx,by,bz;
1358   ISLocalToGlobalMapping ltog,mltog;
1359   DMDAStencilType        st;
1360   PetscBool              removedups = PETSC_FALSE;
1361 
1362   PetscFunctionBegin;
1363   /*
1364          nc - number of components per grid point
1365          col - number of colors needed in one direction for single component problem
1366 
1367   */
1368   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
1369   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1370     CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1371   }
1372   col  = 2*s + 1;
1373 
1374   /*
1375        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1376        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1377   */
1378   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1379   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1380   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1381 
1382   CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1383   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1384   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
1385 
1386   CHKERRQ(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols));
1387   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
1388 
1389   CHKERRQ(MatSetBlockSize(J,nc));
1390   /* determine the matrix preallocation information */
1391   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1392   for (i=xs; i<xs+nx; i++) {
1393     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1394     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1395     for (j=ys; j<ys+ny; j++) {
1396       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1397       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1398       for (k=zs; k<zs+nz; k++) {
1399         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1400         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1401 
1402         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1403 
1404         cnt = 0;
1405         for (l=0; l<nc; l++) {
1406           for (ii=istart; ii<iend+1; ii++) {
1407             for (jj=jstart; jj<jend+1; jj++) {
1408               for (kk=kstart; kk<kend+1; kk++) {
1409                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1410                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1411                 }
1412               }
1413             }
1414           }
1415           rows[l] = l + nc*(slot);
1416         }
1417         if (removedups) {
1418           CHKERRQ(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1419         } else {
1420           CHKERRQ(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz));
1421         }
1422       }
1423     }
1424   }
1425   CHKERRQ(MatSetBlockSize(J,nc));
1426   CHKERRQ(MatSeqAIJSetPreallocation(J,0,dnz));
1427   CHKERRQ(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
1428   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1429   CHKERRQ(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1430   if (!mltog) {
1431     CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
1432   }
1433 
1434   /*
1435     For each node in the grid: we get the neighbors in the local (on processor ordering
1436     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1437     PETSc ordering.
1438   */
1439   if (!da->prealloc_only) {
1440     for (i=xs; i<xs+nx; i++) {
1441       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1442       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1443       for (j=ys; j<ys+ny; j++) {
1444         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1445         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1446         for (k=zs; k<zs+nz; k++) {
1447           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1448           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1449 
1450           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1451 
1452           cnt = 0;
1453           for (kk=kstart; kk<kend+1; kk++) {
1454             for (jj=jstart; jj<jend+1; jj++) {
1455               for (ii=istart; ii<iend+1; ii++) {
1456                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1457                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1458                     for (l=1; l<nc; l++) {
1459                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1460                   }
1461                 }
1462               }
1463             }
1464           }
1465           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1466           CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1467         }
1468       }
1469     }
1470     /* do not copy values to GPU since they are all zero and not yet needed there */
1471     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
1472     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1473     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1474     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1475       CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1476     }
1477     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
1478     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1479   }
1480   CHKERRQ(PetscFree2(rows,cols));
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 /* ---------------------------------------------------------------------------------*/
1485 
1486 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1487 {
1488   DM_DA                  *dd = (DM_DA*)da->data;
1489   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1490   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1491   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1492   DMBoundaryType         bx;
1493   ISLocalToGlobalMapping ltog;
1494   PetscMPIInt            rank,size;
1495 
1496   PetscFunctionBegin;
1497   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank));
1498   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size));
1499 
1500   /*
1501          nc - number of components per grid point
1502 
1503   */
1504   CHKERRQ(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1505   PetscCheckFalse(s > 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1506   CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1507   CHKERRQ(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1508 
1509   CHKERRQ(MatSetBlockSize(J,nc));
1510   CHKERRQ(PetscCalloc2(nx*nc,&cols,nx*nc,&ocols));
1511 
1512   /*
1513         note should be smaller for first and last process with no periodic
1514         does not handle dfill
1515   */
1516   cnt = 0;
1517   /* coupling with process to the left */
1518   for (i=0; i<s; i++) {
1519     for (j=0; j<nc; j++) {
1520       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1521       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1522       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1523         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1524         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1525       }
1526       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1527       cnt++;
1528     }
1529   }
1530   for (i=s; i<nx-s; i++) {
1531     for (j=0; j<nc; j++) {
1532       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1533       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1534       cnt++;
1535     }
1536   }
1537   /* coupling with process to the right */
1538   for (i=nx-s; i<nx; i++) {
1539     for (j=0; j<nc; j++) {
1540       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1541       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1542       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1543         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1544         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1545       }
1546       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1547       cnt++;
1548     }
1549   }
1550 
1551   CHKERRQ(MatSeqAIJSetPreallocation(J,0,cols));
1552   CHKERRQ(MatMPIAIJSetPreallocation(J,0,cols,0,ocols));
1553   CHKERRQ(PetscFree2(cols,ocols));
1554 
1555   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
1556   CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
1557 
1558   /*
1559     For each node in the grid: we get the neighbors in the local (on processor ordering
1560     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1561     PETSc ordering.
1562   */
1563   if (!da->prealloc_only) {
1564     CHKERRQ(PetscMalloc1(maxcnt,&cols));
1565     row = xs*nc;
1566     /* coupling with process to the left */
1567     for (i=xs; i<xs+s; i++) {
1568       for (j=0; j<nc; j++) {
1569         cnt = 0;
1570         if (rank) {
1571           for (l=0; l<s; l++) {
1572             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1573           }
1574         }
1575         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1576           for (l=0; l<s; l++) {
1577             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1578           }
1579         }
1580         if (dfill) {
1581           for (k=dfill[j]; k<dfill[j+1]; k++) {
1582             cols[cnt++] = i*nc + dfill[k];
1583           }
1584         } else {
1585           for (k=0; k<nc; k++) {
1586             cols[cnt++] = i*nc + k;
1587           }
1588         }
1589         for (l=0; l<s; l++) {
1590           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1591         }
1592         CHKERRQ(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1593         row++;
1594       }
1595     }
1596     for (i=xs+s; i<xs+nx-s; i++) {
1597       for (j=0; j<nc; j++) {
1598         cnt = 0;
1599         for (l=0; l<s; l++) {
1600           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1601         }
1602         if (dfill) {
1603           for (k=dfill[j]; k<dfill[j+1]; k++) {
1604             cols[cnt++] = i*nc + dfill[k];
1605           }
1606         } else {
1607           for (k=0; k<nc; k++) {
1608             cols[cnt++] = i*nc + k;
1609           }
1610         }
1611         for (l=0; l<s; l++) {
1612           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1613         }
1614         CHKERRQ(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1615         row++;
1616       }
1617     }
1618     /* coupling with process to the right */
1619     for (i=xs+nx-s; i<xs+nx; i++) {
1620       for (j=0; j<nc; j++) {
1621         cnt = 0;
1622         for (l=0; l<s; l++) {
1623           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1624         }
1625         if (dfill) {
1626           for (k=dfill[j]; k<dfill[j+1]; k++) {
1627             cols[cnt++] = i*nc + dfill[k];
1628           }
1629         } else {
1630           for (k=0; k<nc; k++) {
1631             cols[cnt++] = i*nc + k;
1632           }
1633         }
1634         if (rank < size-1) {
1635           for (l=0; l<s; l++) {
1636             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1637           }
1638         }
1639         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1640           for (l=0; l<s; l++) {
1641             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1642           }
1643         }
1644         CHKERRQ(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES));
1645         row++;
1646       }
1647     }
1648     CHKERRQ(PetscFree(cols));
1649     /* do not copy values to GPU since they are all zero and not yet needed there */
1650     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
1651     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1652     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1653     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
1654     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1655   }
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 /* ---------------------------------------------------------------------------------*/
1660 
1661 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1662 {
1663   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1664   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1665   PetscInt               istart,iend;
1666   DMBoundaryType         bx;
1667   ISLocalToGlobalMapping ltog,mltog;
1668 
1669   PetscFunctionBegin;
1670   /*
1671          nc - number of components per grid point
1672          col - number of colors needed in one direction for single component problem
1673 
1674   */
1675   CHKERRQ(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1676   if (bx == DM_BOUNDARY_NONE) {
1677     CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE));
1678   }
1679   col  = 2*s + 1;
1680 
1681   CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1682   CHKERRQ(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1683 
1684   CHKERRQ(MatSetBlockSize(J,nc));
1685   CHKERRQ(MatSeqAIJSetPreallocation(J,col*nc,NULL));
1686   CHKERRQ(MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL));
1687 
1688   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
1689   CHKERRQ(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1690   if (!mltog) {
1691     CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
1692   }
1693 
1694   /*
1695     For each node in the grid: we get the neighbors in the local (on processor ordering
1696     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1697     PETSc ordering.
1698   */
1699   if (!da->prealloc_only) {
1700     CHKERRQ(PetscMalloc2(nc,&rows,col*nc*nc,&cols));
1701     for (i=xs; i<xs+nx; i++) {
1702       istart = PetscMax(-s,gxs - i);
1703       iend   = PetscMin(s,gxs + gnx - i - 1);
1704       slot   = i - gxs;
1705 
1706       cnt = 0;
1707       for (i1=istart; i1<iend+1; i1++) {
1708         cols[cnt++] = nc*(slot + i1);
1709         for (l=1; l<nc; l++) {
1710           cols[cnt] = 1 + cols[cnt-1];cnt++;
1711         }
1712       }
1713       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1714       CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1715     }
1716     /* do not copy values to GPU since they are all zero and not yet needed there */
1717     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
1718     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1719     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1720     if (bx == DM_BOUNDARY_NONE) {
1721       CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1722     }
1723     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
1724     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1725     CHKERRQ(PetscFree2(rows,cols));
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 /* ---------------------------------------------------------------------------------*/
1731 
1732 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J)
1733 {
1734   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1735   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1736   PetscInt               istart,iend;
1737   DMBoundaryType         bx;
1738   ISLocalToGlobalMapping ltog,mltog;
1739 
1740   PetscFunctionBegin;
1741   /*
1742          nc - number of components per grid point
1743          col - number of colors needed in one direction for single component problem
1744   */
1745   CHKERRQ(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL));
1746   col  = 2*s + 1;
1747 
1748   CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL));
1749   CHKERRQ(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL));
1750 
1751   CHKERRQ(MatSetBlockSize(J,nc));
1752   CHKERRQ(MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc));
1753 
1754   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
1755   CHKERRQ(MatGetLocalToGlobalMapping(J,&mltog,NULL));
1756   if (!mltog) {
1757     CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
1758   }
1759 
1760   /*
1761     For each node in the grid: we get the neighbors in the local (on processor ordering
1762     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1763     PETSc ordering.
1764   */
1765   if (!da->prealloc_only) {
1766     CHKERRQ(PetscMalloc2(nc,&rows,col*nc*nc,&cols));
1767     for (i=xs; i<xs+nx; i++) {
1768       istart = PetscMax(-s,gxs - i);
1769       iend   = PetscMin(s,gxs + gnx - i - 1);
1770       slot   = i - gxs;
1771 
1772       cnt = 0;
1773       for (i1=istart; i1<iend+1; i1++) {
1774         cols[cnt++] = nc*(slot + i1);
1775         for (l=1; l<nc; l++) {
1776           cols[cnt] = 1 + cols[cnt-1];cnt++;
1777         }
1778       }
1779       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1780       CHKERRQ(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES));
1781     }
1782     /* do not copy values to GPU since they are all zero and not yet needed there */
1783     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
1784     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1785     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1786     if (bx == DM_BOUNDARY_NONE) {
1787       CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1788     }
1789     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
1790     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1791     CHKERRQ(PetscFree2(rows,cols));
1792   }
1793   CHKERRQ(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE));
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1798 {
1799   PetscErrorCode         ierr;
1800   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1801   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1802   PetscInt               istart,iend,jstart,jend,ii,jj;
1803   MPI_Comm               comm;
1804   PetscScalar            *values;
1805   DMBoundaryType         bx,by;
1806   DMDAStencilType        st;
1807   ISLocalToGlobalMapping ltog;
1808 
1809   PetscFunctionBegin;
1810   /*
1811      nc - number of components per grid point
1812      col - number of colors needed in one direction for single component problem
1813   */
1814   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
1815   col  = 2*s + 1;
1816 
1817   CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
1818   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
1819   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
1820 
1821   CHKERRQ(PetscMalloc1(col*col*nc*nc,&cols));
1822 
1823   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
1824 
1825   /* determine the matrix preallocation information */
1826   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1827   for (i=xs; i<xs+nx; i++) {
1828     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1829     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1830     for (j=ys; j<ys+ny; j++) {
1831       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1832       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1833       slot   = i - gxs + gnx*(j - gys);
1834 
1835       /* Find block columns in block row */
1836       cnt = 0;
1837       for (ii=istart; ii<iend+1; ii++) {
1838         for (jj=jstart; jj<jend+1; jj++) {
1839           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1840             cols[cnt++] = slot + ii + gnx*jj;
1841           }
1842         }
1843       }
1844       CHKERRQ(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz));
1845     }
1846   }
1847   CHKERRQ(MatSeqBAIJSetPreallocation(J,nc,0,dnz));
1848   CHKERRQ(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz));
1849   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1850 
1851   CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
1852 
1853   /*
1854     For each node in the grid: we get the neighbors in the local (on processor ordering
1855     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1856     PETSc ordering.
1857   */
1858   if (!da->prealloc_only) {
1859     CHKERRQ(PetscCalloc1(col*col*nc*nc,&values));
1860     for (i=xs; i<xs+nx; i++) {
1861       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1862       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1863       for (j=ys; j<ys+ny; j++) {
1864         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1865         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1866         slot = i - gxs + gnx*(j - gys);
1867         cnt  = 0;
1868         for (ii=istart; ii<iend+1; ii++) {
1869           for (jj=jstart; jj<jend+1; jj++) {
1870             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1871               cols[cnt++] = slot + ii + gnx*jj;
1872             }
1873           }
1874         }
1875         CHKERRQ(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES));
1876       }
1877     }
1878     CHKERRQ(PetscFree(values));
1879     /* do not copy values to GPU since they are all zero and not yet needed there */
1880     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
1881     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1882     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1883     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
1884     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1885   }
1886   CHKERRQ(PetscFree(cols));
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1891 {
1892   PetscErrorCode         ierr;
1893   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1894   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1895   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1896   MPI_Comm               comm;
1897   PetscScalar            *values;
1898   DMBoundaryType         bx,by,bz;
1899   DMDAStencilType        st;
1900   ISLocalToGlobalMapping ltog;
1901 
1902   PetscFunctionBegin;
1903   /*
1904          nc - number of components per grid point
1905          col - number of colors needed in one direction for single component problem
1906 
1907   */
1908   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st));
1909   col  = 2*s + 1;
1910 
1911   CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
1912   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
1913   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
1914 
1915   CHKERRQ(PetscMalloc1(col*col*col,&cols));
1916 
1917   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
1918 
1919   /* determine the matrix preallocation information */
1920   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1921   for (i=xs; i<xs+nx; i++) {
1922     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1923     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1924     for (j=ys; j<ys+ny; j++) {
1925       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1926       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1927       for (k=zs; k<zs+nz; k++) {
1928         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1929         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1930 
1931         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1932 
1933         /* Find block columns in block row */
1934         cnt = 0;
1935         for (ii=istart; ii<iend+1; ii++) {
1936           for (jj=jstart; jj<jend+1; jj++) {
1937             for (kk=kstart; kk<kend+1; kk++) {
1938               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1939                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1940               }
1941             }
1942           }
1943         }
1944         CHKERRQ(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz));
1945       }
1946     }
1947   }
1948   CHKERRQ(MatSeqBAIJSetPreallocation(J,nc,0,dnz));
1949   CHKERRQ(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz));
1950   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1951 
1952   CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
1953 
1954   /*
1955     For each node in the grid: we get the neighbors in the local (on processor ordering
1956     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1957     PETSc ordering.
1958   */
1959   if (!da->prealloc_only) {
1960     CHKERRQ(PetscCalloc1(col*col*col*nc*nc,&values));
1961     for (i=xs; i<xs+nx; i++) {
1962       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1963       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1964       for (j=ys; j<ys+ny; j++) {
1965         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1966         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1967         for (k=zs; k<zs+nz; k++) {
1968           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1969           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1970 
1971           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1972 
1973           cnt = 0;
1974           for (ii=istart; ii<iend+1; ii++) {
1975             for (jj=jstart; jj<jend+1; jj++) {
1976               for (kk=kstart; kk<kend+1; kk++) {
1977                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1978                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1979                 }
1980               }
1981             }
1982           }
1983           CHKERRQ(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES));
1984         }
1985       }
1986     }
1987     CHKERRQ(PetscFree(values));
1988     /* do not copy values to GPU since they are all zero and not yet needed there */
1989     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
1990     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
1991     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
1992     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
1993     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
1994   }
1995   CHKERRQ(PetscFree(cols));
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 /*
2000   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2001   identify in the local ordering with periodic domain.
2002 */
2003 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2004 {
2005   PetscInt       i,n;
2006 
2007   PetscFunctionBegin;
2008   CHKERRQ(ISLocalToGlobalMappingApplyBlock(ltog,1,row,row));
2009   CHKERRQ(ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col));
2010   for (i=0,n=0; i<*cnt; i++) {
2011     if (col[i] >= *row) col[n++] = col[i];
2012   }
2013   *cnt = n;
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2018 {
2019   PetscErrorCode         ierr;
2020   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2021   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2022   PetscInt               istart,iend,jstart,jend,ii,jj;
2023   MPI_Comm               comm;
2024   PetscScalar            *values;
2025   DMBoundaryType         bx,by;
2026   DMDAStencilType        st;
2027   ISLocalToGlobalMapping ltog;
2028 
2029   PetscFunctionBegin;
2030   /*
2031      nc - number of components per grid point
2032      col - number of colors needed in one direction for single component problem
2033   */
2034   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st));
2035   col  = 2*s + 1;
2036 
2037   CHKERRQ(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL));
2038   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL));
2039   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
2040 
2041   CHKERRQ(PetscMalloc1(col*col*nc*nc,&cols));
2042 
2043   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
2044 
2045   /* determine the matrix preallocation information */
2046   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2047   for (i=xs; i<xs+nx; i++) {
2048     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2049     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2050     for (j=ys; j<ys+ny; j++) {
2051       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2052       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2053       slot   = i - gxs + gnx*(j - gys);
2054 
2055       /* Find block columns in block row */
2056       cnt = 0;
2057       for (ii=istart; ii<iend+1; ii++) {
2058         for (jj=jstart; jj<jend+1; jj++) {
2059           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2060             cols[cnt++] = slot + ii + gnx*jj;
2061           }
2062         }
2063       }
2064       CHKERRQ(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2065       CHKERRQ(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz));
2066     }
2067   }
2068   CHKERRQ(MatSeqSBAIJSetPreallocation(J,nc,0,dnz));
2069   CHKERRQ(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz));
2070   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2071 
2072   CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
2073 
2074   /*
2075     For each node in the grid: we get the neighbors in the local (on processor ordering
2076     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2077     PETSc ordering.
2078   */
2079   if (!da->prealloc_only) {
2080     CHKERRQ(PetscCalloc1(col*col*nc*nc,&values));
2081     for (i=xs; i<xs+nx; i++) {
2082       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2083       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2084       for (j=ys; j<ys+ny; j++) {
2085         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2086         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2087         slot   = i - gxs + gnx*(j - gys);
2088 
2089         /* Find block columns in block row */
2090         cnt = 0;
2091         for (ii=istart; ii<iend+1; ii++) {
2092           for (jj=jstart; jj<jend+1; jj++) {
2093             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2094               cols[cnt++] = slot + ii + gnx*jj;
2095             }
2096           }
2097         }
2098         CHKERRQ(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2099         CHKERRQ(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES));
2100       }
2101     }
2102     CHKERRQ(PetscFree(values));
2103     /* do not copy values to GPU since they are all zero and not yet needed there */
2104     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
2105     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2106     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2107     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
2108     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2109   }
2110   CHKERRQ(PetscFree(cols));
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2115 {
2116   PetscErrorCode         ierr;
2117   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2118   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2119   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2120   MPI_Comm               comm;
2121   PetscScalar            *values;
2122   DMBoundaryType         bx,by,bz;
2123   DMDAStencilType        st;
2124   ISLocalToGlobalMapping ltog;
2125 
2126   PetscFunctionBegin;
2127   /*
2128      nc - number of components per grid point
2129      col - number of colors needed in one direction for single component problem
2130   */
2131   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st));
2132   col  = 2*s + 1;
2133 
2134   CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
2135   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
2136   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
2137 
2138   /* create the matrix */
2139   CHKERRQ(PetscMalloc1(col*col*col,&cols));
2140 
2141   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
2142 
2143   /* determine the matrix preallocation information */
2144   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2145   for (i=xs; i<xs+nx; i++) {
2146     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2147     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2148     for (j=ys; j<ys+ny; j++) {
2149       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2150       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2151       for (k=zs; k<zs+nz; k++) {
2152         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2153         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2154 
2155         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2156 
2157         /* Find block columns in block row */
2158         cnt = 0;
2159         for (ii=istart; ii<iend+1; ii++) {
2160           for (jj=jstart; jj<jend+1; jj++) {
2161             for (kk=kstart; kk<kend+1; kk++) {
2162               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2163                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2164               }
2165             }
2166           }
2167         }
2168         CHKERRQ(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2169         CHKERRQ(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz));
2170       }
2171     }
2172   }
2173   CHKERRQ(MatSeqSBAIJSetPreallocation(J,nc,0,dnz));
2174   CHKERRQ(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz));
2175   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2176 
2177   CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
2178 
2179   /*
2180     For each node in the grid: we get the neighbors in the local (on processor ordering
2181     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2182     PETSc ordering.
2183   */
2184   if (!da->prealloc_only) {
2185     CHKERRQ(PetscCalloc1(col*col*col*nc*nc,&values));
2186     for (i=xs; i<xs+nx; i++) {
2187       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2188       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2189       for (j=ys; j<ys+ny; j++) {
2190         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2191         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2192         for (k=zs; k<zs+nz; k++) {
2193           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2194           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2195 
2196           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2197 
2198           cnt = 0;
2199           for (ii=istart; ii<iend+1; ii++) {
2200             for (jj=jstart; jj<jend+1; jj++) {
2201               for (kk=kstart; kk<kend+1; kk++) {
2202                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2203                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2204                 }
2205               }
2206             }
2207           }
2208           CHKERRQ(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols));
2209           CHKERRQ(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES));
2210         }
2211       }
2212     }
2213     CHKERRQ(PetscFree(values));
2214     /* do not copy values to GPU since they are all zero and not yet needed there */
2215     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
2216     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2217     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2218     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
2219     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2220   }
2221   CHKERRQ(PetscFree(cols));
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 /* ---------------------------------------------------------------------------------*/
2226 
2227 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2228 {
2229   PetscErrorCode         ierr;
2230   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2231   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2232   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2233   DM_DA                  *dd = (DM_DA*)da->data;
2234   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2235   MPI_Comm               comm;
2236   PetscScalar            *values;
2237   DMBoundaryType         bx,by,bz;
2238   ISLocalToGlobalMapping ltog;
2239   DMDAStencilType        st;
2240   PetscBool              removedups = PETSC_FALSE;
2241 
2242   PetscFunctionBegin;
2243   /*
2244          nc - number of components per grid point
2245          col - number of colors needed in one direction for single component problem
2246 
2247   */
2248   CHKERRQ(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st));
2249   col  = 2*s + 1;
2250   PetscCheckFalse(bx == DM_BOUNDARY_PERIODIC && (m % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2251                  by 2*stencil_width + 1\n");
2252   PetscCheckFalse(by == DM_BOUNDARY_PERIODIC && (n % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2253                  by 2*stencil_width + 1\n");
2254   PetscCheckFalse(bz == DM_BOUNDARY_PERIODIC && (p % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2255                  by 2*stencil_width + 1\n");
2256 
2257   /*
2258        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2259        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2260   */
2261   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2262   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2263   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2264 
2265   CHKERRQ(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz));
2266   CHKERRQ(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz));
2267   CHKERRQ(PetscObjectGetComm((PetscObject)da,&comm));
2268 
2269   CHKERRQ(PetscMalloc1(col*col*col*nc,&cols));
2270   CHKERRQ(DMGetLocalToGlobalMapping(da,&ltog));
2271 
2272   /* determine the matrix preallocation information */
2273   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2274 
2275   CHKERRQ(MatSetBlockSize(J,nc));
2276   for (i=xs; i<xs+nx; i++) {
2277     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2278     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2279     for (j=ys; j<ys+ny; j++) {
2280       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2281       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2282       for (k=zs; k<zs+nz; k++) {
2283         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2284         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2285 
2286         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2287 
2288         for (l=0; l<nc; l++) {
2289           cnt = 0;
2290           for (ii=istart; ii<iend+1; ii++) {
2291             for (jj=jstart; jj<jend+1; jj++) {
2292               for (kk=kstart; kk<kend+1; kk++) {
2293                 if (ii || jj || kk) {
2294                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2295                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2296                   }
2297                 } else {
2298                   if (dfill) {
2299                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2300                   } else {
2301                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2302                   }
2303                 }
2304               }
2305             }
2306           }
2307           row  = l + nc*(slot);
2308           maxcnt = PetscMax(maxcnt,cnt);
2309           if (removedups) {
2310             CHKERRQ(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz));
2311           } else {
2312             CHKERRQ(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz));
2313           }
2314         }
2315       }
2316     }
2317   }
2318   CHKERRQ(MatSeqAIJSetPreallocation(J,0,dnz));
2319   CHKERRQ(MatMPIAIJSetPreallocation(J,0,dnz,0,onz));
2320   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2321   CHKERRQ(MatSetLocalToGlobalMapping(J,ltog,ltog));
2322 
2323   /*
2324     For each node in the grid: we get the neighbors in the local (on processor ordering
2325     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2326     PETSc ordering.
2327   */
2328   if (!da->prealloc_only) {
2329     CHKERRQ(PetscCalloc1(maxcnt,&values));
2330     for (i=xs; i<xs+nx; i++) {
2331       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2332       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2333       for (j=ys; j<ys+ny; j++) {
2334         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2335         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2336         for (k=zs; k<zs+nz; k++) {
2337           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2338           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2339 
2340           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2341 
2342           for (l=0; l<nc; l++) {
2343             cnt = 0;
2344             for (ii=istart; ii<iend+1; ii++) {
2345               for (jj=jstart; jj<jend+1; jj++) {
2346                 for (kk=kstart; kk<kend+1; kk++) {
2347                   if (ii || jj || kk) {
2348                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2349                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2350                     }
2351                   } else {
2352                     if (dfill) {
2353                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2354                     } else {
2355                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2356                     }
2357                   }
2358                 }
2359               }
2360             }
2361             row  = l + nc*(slot);
2362             CHKERRQ(MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES));
2363           }
2364         }
2365       }
2366     }
2367     CHKERRQ(PetscFree(values));
2368     /* do not copy values to GPU since they are all zero and not yet needed there */
2369     CHKERRQ(MatBindToCPU(J,PETSC_TRUE));
2370     CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
2371     CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
2372     CHKERRQ(MatBindToCPU(J,PETSC_FALSE));
2373     CHKERRQ(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2374   }
2375   CHKERRQ(PetscFree(cols));
2376   PetscFunctionReturn(0);
2377 }
2378