xref: /petsc/src/dm/impls/da/fdda.c (revision ae8b063e4f7e8ac6d2de6909aa6d539b126b38d9)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
18 {
19   PetscErrorCode ierr;
20   PetscInt       i,j,nz,*fill;
21 
22   PetscFunctionBegin;
23   if (!dfill) PetscFunctionReturn(0);
24 
25   /* count number nonzeros */
26   nz = 0;
27   for (i=0; i<w; i++) {
28     for (j=0; j<w; j++) {
29       if (dfill[w*i+j]) nz++;
30     }
31   }
32   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
33   /* construct modified CSR storage of nonzero structure */
34   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
35    so fill[1] - fill[0] gives number of nonzeros in first row etc */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41         fill[nz] = j;
42         nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       nz;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
65   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
70 {
71   PetscErrorCode ierr;
72   PetscInt       i,k,cnt = 1;
73 
74   PetscFunctionBegin;
75 
76   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
77    columns to the left with any nonzeros in them plus 1 */
78   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
79   for (i=0; i<dd->w; i++) {
80     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
81   }
82   for (i=0; i<dd->w; i++) {
83     if (dd->ofillcols[i]) {
84       dd->ofillcols[i] = cnt++;
85     }
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 /*@
91     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
92     of the matrix returned by DMCreateMatrix().
93 
94     Logically Collective on da
95 
96     Input Parameters:
97 +   da - the distributed array
98 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
99 -   ofill - the fill pattern in the off-diagonal blocks
100 
101     Level: developer
102 
103     Notes:
104     This only makes sense when you are doing multicomponent problems but using the
105        MPIAIJ matrix format
106 
107            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
108        representing coupling and 0 entries for missing coupling. For example
109 $             dfill[9] = {1, 0, 0,
110 $                         1, 1, 0,
111 $                         0, 1, 1}
112        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
113        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
114        diagonal block).
115 
116      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
117      can be represented in the dfill, ofill format
118 
119    Contributed by Glenn Hammond
120 
121 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
122 
123 @*/
124 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
125 {
126   DM_DA          *dd = (DM_DA*)da->data;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   /* save the given dfill and ofill information */
131   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
132   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
133 
134   /* count nonzeros in ofill columns */
135   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
136 
137   PetscFunctionReturn(0);
138 }
139 
140 /*@
141     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
142     of the matrix returned by DMCreateMatrix(), using sparse representations
143     of fill patterns.
144 
145     Logically Collective on da
146 
147     Input Parameters:
148 +   da - the distributed array
149 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
150 -   ofill - the sparse fill pattern in the off-diagonal blocks
151 
152     Level: developer
153 
154     Notes: This only makes sense when you are doing multicomponent problems but using the
155        MPIAIJ matrix format
156 
157            The format for dfill and ofill is a sparse representation of a
158            dof-by-dof matrix with 1 entries representing coupling and 0 entries
159            for missing coupling.  The sparse representation is a 1 dimensional
160            array of length nz + dof + 1, where nz is the number of non-zeros in
161            the matrix.  The first dof entries in the array give the
162            starting array indices of each row's items in the rest of the array,
163            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
164            and the remaining nz items give the column indices of each of
165            the 1s within the logical 2D matrix.  Each row's items within
166            the array are the column indices of the 1s within that row
167            of the 2D matrix.  PETSc developers may recognize that this is the
168            same format as that computed by the DMDASetBlockFills_Private()
169            function from a dense 2D matrix representation.
170 
171      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
172      can be represented in the dfill, ofill format
173 
174    Contributed by Philip C. Roth
175 
176 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
177 
178 @*/
179 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
180 {
181   DM_DA          *dd = (DM_DA*)da->data;
182   PetscErrorCode ierr;
183 
184   PetscFunctionBegin;
185   /* save the given dfill and ofill information */
186   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
187   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
188 
189   /* count nonzeros in ofill columns */
190   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
191 
192   PetscFunctionReturn(0);
193 }
194 
195 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
196 {
197   PetscErrorCode   ierr;
198   PetscInt         dim,m,n,p,nc;
199   DMBoundaryType   bx,by,bz;
200   MPI_Comm         comm;
201   PetscMPIInt      size;
202   PetscBool        isBAIJ;
203   DM_DA            *dd = (DM_DA*)da->data;
204 
205   PetscFunctionBegin;
206   /*
207                                   m
208           ------------------------------------------------------
209          |                                                     |
210          |                                                     |
211          |               ----------------------                |
212          |               |                    |                |
213       n  |           yn  |                    |                |
214          |               |                    |                |
215          |               .---------------------                |
216          |             (xs,ys)     xn                          |
217          |            .                                        |
218          |         (gxs,gys)                                   |
219          |                                                     |
220           -----------------------------------------------------
221   */
222 
223   /*
224          nc - number of components per grid point
225          col - number of colors needed in one direction for single component problem
226 
227   */
228   ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr);
229 
230   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
231   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
232   if (ctype == IS_COLORING_LOCAL) {
233     if (size == 1) {
234       ctype = IS_COLORING_GLOBAL;
235     } else if (dim > 1) {
236       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
237         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
238       }
239     }
240   }
241 
242   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
243      matrices is for the blocks, not the individual matrix elements  */
244   ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
245   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
246   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
247   if (isBAIJ) {
248     dd->w  = 1;
249     dd->xs = dd->xs/nc;
250     dd->xe = dd->xe/nc;
251     dd->Xs = dd->Xs/nc;
252     dd->Xe = dd->Xe/nc;
253   }
254 
255   /*
256      We do not provide a getcoloring function in the DMDA operations because
257    the basic DMDA does not know about matrices. We think of DMDA as being
258    more low-level then matrices.
259   */
260   if (dim == 1) {
261     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
262   } else if (dim == 2) {
263     ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
264   } else if (dim == 3) {
265     ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
266   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
267   if (isBAIJ) {
268     dd->w  = nc;
269     dd->xs = dd->xs*nc;
270     dd->xe = dd->xe*nc;
271     dd->Xs = dd->Xs*nc;
272     dd->Xe = dd->Xe*nc;
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 /* ---------------------------------------------------------------------------------*/
278 
279 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
280 {
281   PetscErrorCode  ierr;
282   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
283   PetscInt        ncolors;
284   MPI_Comm        comm;
285   DMBoundaryType  bx,by;
286   DMDAStencilType st;
287   ISColoringValue *colors;
288   DM_DA           *dd = (DM_DA*)da->data;
289 
290   PetscFunctionBegin;
291   /*
292          nc - number of components per grid point
293          col - number of colors needed in one direction for single component problem
294 
295   */
296   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
297   col  = 2*s + 1;
298   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
299   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
300   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
301 
302   /* special case as taught to us by Paul Hovland */
303   if (st == DMDA_STENCIL_STAR && s == 1) {
304     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
305   } else {
306     if (ctype == IS_COLORING_GLOBAL) {
307       if (!dd->localcoloring) {
308         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
309         ii   = 0;
310         for (j=ys; j<ys+ny; j++) {
311           for (i=xs; i<xs+nx; i++) {
312             for (k=0; k<nc; k++) {
313               colors[ii++] = k + nc*((i % col) + col*(j % col));
314             }
315           }
316         }
317         ncolors = nc + nc*(col-1 + col*(col-1));
318         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
319       }
320       *coloring = dd->localcoloring;
321     } else if (ctype == IS_COLORING_LOCAL) {
322       if (!dd->ghostedcoloring) {
323         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
324         ii   = 0;
325         for (j=gys; j<gys+gny; j++) {
326           for (i=gxs; i<gxs+gnx; i++) {
327             for (k=0; k<nc; k++) {
328               /* the complicated stuff is to handle periodic boundaries */
329               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
330             }
331           }
332         }
333         ncolors = nc + nc*(col - 1 + col*(col-1));
334         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
335         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
336 
337         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
338       }
339       *coloring = dd->ghostedcoloring;
340     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
341   }
342   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 /* ---------------------------------------------------------------------------------*/
347 
348 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
349 {
350   PetscErrorCode  ierr;
351   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
352   PetscInt        ncolors;
353   MPI_Comm        comm;
354   DMBoundaryType  bx,by,bz;
355   DMDAStencilType st;
356   ISColoringValue *colors;
357   DM_DA           *dd = (DM_DA*)da->data;
358 
359   PetscFunctionBegin;
360   /*
361          nc - number of components per grid point
362          col - number of colors needed in one direction for single component problem
363 
364   */
365   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
366   col  = 2*s + 1;
367   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
368   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
369   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
370 
371   /* create the coloring */
372   if (ctype == IS_COLORING_GLOBAL) {
373     if (!dd->localcoloring) {
374       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
375       ii   = 0;
376       for (k=zs; k<zs+nz; k++) {
377         for (j=ys; j<ys+ny; j++) {
378           for (i=xs; i<xs+nx; i++) {
379             for (l=0; l<nc; l++) {
380               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
381             }
382           }
383         }
384       }
385       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
386       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
387     }
388     *coloring = dd->localcoloring;
389   } else if (ctype == IS_COLORING_LOCAL) {
390     if (!dd->ghostedcoloring) {
391       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
392       ii   = 0;
393       for (k=gzs; k<gzs+gnz; k++) {
394         for (j=gys; j<gys+gny; j++) {
395           for (i=gxs; i<gxs+gnx; i++) {
396             for (l=0; l<nc; l++) {
397               /* the complicated stuff is to handle periodic boundaries */
398               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
399             }
400           }
401         }
402       }
403       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
404       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
405       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
406     }
407     *coloring = dd->ghostedcoloring;
408   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
409   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 /* ---------------------------------------------------------------------------------*/
414 
415 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
416 {
417   PetscErrorCode  ierr;
418   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
419   PetscInt        ncolors;
420   MPI_Comm        comm;
421   DMBoundaryType  bx;
422   ISColoringValue *colors;
423   DM_DA           *dd = (DM_DA*)da->data;
424 
425   PetscFunctionBegin;
426   /*
427          nc - number of components per grid point
428          col - number of colors needed in one direction for single component problem
429 
430   */
431   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
432   col  = 2*s + 1;
433   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
434   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
435   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
436 
437   /* create the coloring */
438   if (ctype == IS_COLORING_GLOBAL) {
439     if (!dd->localcoloring) {
440       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
441       if (dd->ofillcols) {
442         PetscInt tc = 0;
443         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
444         i1 = 0;
445         for (i=xs; i<xs+nx; i++) {
446           for (l=0; l<nc; l++) {
447             if (dd->ofillcols[l] && (i % col)) {
448               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
449             } else {
450               colors[i1++] = l;
451             }
452           }
453         }
454         ncolors = nc + 2*s*tc;
455       } else {
456         i1 = 0;
457         for (i=xs; i<xs+nx; i++) {
458           for (l=0; l<nc; l++) {
459             colors[i1++] = l + nc*(i % col);
460           }
461         }
462         ncolors = nc + nc*(col-1);
463       }
464       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
465     }
466     *coloring = dd->localcoloring;
467   } else if (ctype == IS_COLORING_LOCAL) {
468     if (!dd->ghostedcoloring) {
469       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
470       i1   = 0;
471       for (i=gxs; i<gxs+gnx; i++) {
472         for (l=0; l<nc; l++) {
473           /* the complicated stuff is to handle periodic boundaries */
474           colors[i1++] = l + nc*(SetInRange(i,m) % col);
475         }
476       }
477       ncolors = nc + nc*(col-1);
478       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
479       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
480     }
481     *coloring = dd->ghostedcoloring;
482   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
483   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
488 {
489   PetscErrorCode  ierr;
490   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
491   PetscInt        ncolors;
492   MPI_Comm        comm;
493   DMBoundaryType  bx,by;
494   ISColoringValue *colors;
495   DM_DA           *dd = (DM_DA*)da->data;
496 
497   PetscFunctionBegin;
498   /*
499          nc - number of components per grid point
500          col - number of colors needed in one direction for single component problem
501 
502   */
503   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr);
504   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
505   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
506   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
507   /* create the coloring */
508   if (ctype == IS_COLORING_GLOBAL) {
509     if (!dd->localcoloring) {
510       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
511       ii   = 0;
512       for (j=ys; j<ys+ny; j++) {
513         for (i=xs; i<xs+nx; i++) {
514           for (k=0; k<nc; k++) {
515             colors[ii++] = k + nc*((3*j+i) % 5);
516           }
517         }
518       }
519       ncolors = 5*nc;
520       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
521     }
522     *coloring = dd->localcoloring;
523   } else if (ctype == IS_COLORING_LOCAL) {
524     if (!dd->ghostedcoloring) {
525       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
526       ii = 0;
527       for (j=gys; j<gys+gny; j++) {
528         for (i=gxs; i<gxs+gnx; i++) {
529           for (k=0; k<nc; k++) {
530             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
531           }
532         }
533       }
534       ncolors = 5*nc;
535       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
536       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
537     }
538     *coloring = dd->ghostedcoloring;
539   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
540   PetscFunctionReturn(0);
541 }
542 
543 /* =========================================================================== */
544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
545 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
546 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat);
547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
548 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
550 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
551 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
552 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
553 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
554 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
555 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
556 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
557 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
558 
559 /*@C
560    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
561 
562    Logically Collective on mat
563 
564    Input Parameters:
565 +  mat - the matrix
566 -  da - the da
567 
568    Level: intermediate
569 
570 @*/
571 PetscErrorCode MatSetupDM(Mat mat,DM da)
572 {
573   PetscErrorCode ierr;
574 
575   PetscFunctionBegin;
576   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
577   PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA);
578   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
583 {
584   DM                da;
585   PetscErrorCode    ierr;
586   const char        *prefix;
587   Mat               Anatural;
588   AO                ao;
589   PetscInt          rstart,rend,*petsc,i;
590   IS                is;
591   MPI_Comm          comm;
592   PetscViewerFormat format;
593 
594   PetscFunctionBegin;
595   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
596   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
597   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
598 
599   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
600   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
601   PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
602 
603   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
604   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
605   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
606   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
607   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
608   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
609 
610   /* call viewer on natural ordering */
611   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
612   ierr = ISDestroy(&is);CHKERRQ(ierr);
613   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
614   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
615   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
616   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
617   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
618   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
619   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
624 {
625   DM             da;
626   PetscErrorCode ierr;
627   Mat            Anatural,Aapp;
628   AO             ao;
629   PetscInt       rstart,rend,*app,i,m,n,M,N;
630   IS             is;
631   MPI_Comm       comm;
632 
633   PetscFunctionBegin;
634   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
635   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
636   PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
637 
638   /* Load the matrix in natural ordering */
639   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
640   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
641   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
642   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
643   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
644   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
645 
646   /* Map natural ordering to application ordering and create IS */
647   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
648   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
649   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
650   for (i=rstart; i<rend; i++) app[i-rstart] = i;
651   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
652   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
653 
654   /* Do permutation and replace header */
655   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
656   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
657   ierr = ISDestroy(&is);CHKERRQ(ierr);
658   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
663 {
664   PetscErrorCode ierr;
665   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
666   Mat            A;
667   MPI_Comm       comm;
668   MatType        Atype;
669   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
670   MatType        mtype;
671   PetscMPIInt    size;
672   DM_DA          *dd = (DM_DA*)da->data;
673 
674   PetscFunctionBegin;
675   ierr = MatInitializePackage();CHKERRQ(ierr);
676   mtype = da->mattype;
677 
678   /*
679                                   m
680           ------------------------------------------------------
681          |                                                     |
682          |                                                     |
683          |               ----------------------                |
684          |               |                    |                |
685       n  |           ny  |                    |                |
686          |               |                    |                |
687          |               .---------------------                |
688          |             (xs,ys)     nx                          |
689          |            .                                        |
690          |         (gxs,gys)                                   |
691          |                                                     |
692           -----------------------------------------------------
693   */
694 
695   /*
696          nc - number of components per grid point
697          col - number of colors needed in one direction for single component problem
698 
699   */
700   M   = dd->M;
701   N   = dd->N;
702   P   = dd->P;
703   dim = da->dim;
704   dof = dd->w;
705   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */
706   ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr);
707   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
708   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
709   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
710   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
711   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
712   if (dof*nx*ny*nz < da->bind_below) {
713     ierr = MatSetBindingPropagates(A,PETSC_TRUE);CHKERRQ(ierr);
714     ierr = MatBindToCPU(A,PETSC_TRUE);CHKERRQ(ierr);
715   }
716   ierr = MatSetDM(A,da);CHKERRQ(ierr);
717   if (da->structure_only) {
718     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
719   }
720   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
721   /*
722      We do not provide a getmatrix function in the DMDA operations because
723    the basic DMDA does not know about matrices. We think of DMDA as being more
724    more low-level than matrices. This is kind of cheating but, cause sometimes
725    we think of DMDA has higher level than matrices.
726 
727      We could switch based on Atype (or mtype), but we do not since the
728    specialized setting routines depend only on the particular preallocation
729    details of the matrix, not the type itself.
730   */
731   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
732   if (!aij) {
733     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
734   }
735   if (!aij) {
736     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
737     if (!baij) {
738       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
739     }
740     if (!baij) {
741       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
742       if (!sbaij) {
743         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
744       }
745       if (!sbaij) {
746         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
747         if (!sell) {
748           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
749         }
750       }
751       if (!sell) {
752         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
753       }
754     }
755   }
756   if (aij) {
757     if (dim == 1) {
758       if (dd->ofill) {
759         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
760       } else {
761         DMBoundaryType bx;
762         PetscMPIInt  size;
763         ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
764         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
765         if (size == 1 && bx == DM_BOUNDARY_NONE) {
766           ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A);CHKERRQ(ierr);
767         } else {
768           ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
769         }
770       }
771     } else if (dim == 2) {
772       if (dd->ofill) {
773         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
774       } else {
775         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
776       }
777     } else if (dim == 3) {
778       if (dd->ofill) {
779         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
780       } else {
781         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
782       }
783     }
784   } else if (baij) {
785     if (dim == 2) {
786       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
787     } else if (dim == 3) {
788       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
789     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
790   } else if (sbaij) {
791     if (dim == 2) {
792       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
793     } else if (dim == 3) {
794       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
795     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
796   } else if (sell) {
797      if (dim == 2) {
798        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
799      } else if (dim == 3) {
800        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
801      } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
802   } else if (is) {
803     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
804   } else {
805     ISLocalToGlobalMapping ltog;
806 
807     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
808     ierr = MatSetUp(A);CHKERRQ(ierr);
809     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
810     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
811   }
812   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
813   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
814   ierr = MatSetDM(A,da);CHKERRQ(ierr);
815   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
816   if (size > 1) {
817     /* change viewer to display matrix in natural ordering */
818     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
819     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
820   }
821   *J = A;
822   PetscFunctionReturn(0);
823 }
824 
825 /* ---------------------------------------------------------------------------------*/
826 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
827 
828 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
829 {
830   DM_DA                  *da = (DM_DA*)dm->data;
831   Mat                    lJ,P;
832   ISLocalToGlobalMapping ltog;
833   IS                     is;
834   PetscBT                bt;
835   const PetscInt         *e_loc,*idx;
836   PetscInt               i,nel,nen,nv,dof,*gidx,n,N;
837   PetscErrorCode         ierr;
838 
839   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
840      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
841   PetscFunctionBegin;
842   dof  = da->w;
843   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
844   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
845 
846   /* flag local elements indices in local DMDA numbering */
847   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
848   ierr = PetscBTCreate(nv/dof,&bt);CHKERRQ(ierr);
849   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
850   for (i=0;i<nel*nen;i++) { ierr = PetscBTSet(bt,e_loc[i]);CHKERRQ(ierr); }
851 
852   /* filter out (set to -1) the global indices not used by the local elements */
853   ierr = PetscMalloc1(nv/dof,&gidx);CHKERRQ(ierr);
854   ierr = ISLocalToGlobalMappingGetBlockIndices(ltog,&idx);CHKERRQ(ierr);
855   ierr = PetscArraycpy(gidx,idx,nv/dof);CHKERRQ(ierr);
856   ierr = ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx);CHKERRQ(ierr);
857   for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1;
858   ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
859   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
860   ierr = ISLocalToGlobalMappingCreateIS(is,&ltog);CHKERRQ(ierr);
861   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
862   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
863   ierr = ISDestroy(&is);CHKERRQ(ierr);
864 
865   /* Preallocation */
866   if (dm->prealloc_skip) {
867     ierr = MatSetUp(J);CHKERRQ(ierr);
868   } else {
869     ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
870     ierr = MatGetLocalToGlobalMapping(lJ,&ltog,NULL);CHKERRQ(ierr);
871     ierr = MatCreate(PetscObjectComm((PetscObject)lJ),&P);CHKERRQ(ierr);
872     ierr = MatSetType(P,MATPREALLOCATOR);CHKERRQ(ierr);
873     ierr = MatSetLocalToGlobalMapping(P,ltog,ltog);CHKERRQ(ierr);
874     ierr = MatGetSize(lJ,&N,NULL);CHKERRQ(ierr);
875     ierr = MatGetLocalSize(lJ,&n,NULL);CHKERRQ(ierr);
876     ierr = MatSetSizes(P,n,n,N,N);CHKERRQ(ierr);
877     ierr = MatSetBlockSize(P,dof);CHKERRQ(ierr);
878     ierr = MatSetUp(P);CHKERRQ(ierr);
879     for (i=0;i<nel;i++) {
880       ierr = MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ);CHKERRQ(ierr);
883     ierr = MatISRestoreLocalMat(J,&lJ);CHKERRQ(ierr);
884     ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
885     ierr = MatDestroy(&P);CHKERRQ(ierr);
886 
887     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
889   }
890   PetscFunctionReturn(0);
891 }
892 
893 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
894 {
895   PetscErrorCode         ierr;
896   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
897   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
898   MPI_Comm               comm;
899   PetscScalar            *values;
900   DMBoundaryType         bx,by;
901   ISLocalToGlobalMapping ltog;
902   DMDAStencilType        st;
903 
904   PetscFunctionBegin;
905   /*
906          nc - number of components per grid point
907          col - number of colors needed in one direction for single component problem
908 
909   */
910   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
911   col  = 2*s + 1;
912   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
913   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
914   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
915 
916   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
917   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
918 
919   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
920   /* determine the matrix preallocation information */
921   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
922   for (i=xs; i<xs+nx; i++) {
923 
924     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
925     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
926 
927     for (j=ys; j<ys+ny; j++) {
928       slot = i - gxs + gnx*(j - gys);
929 
930       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
931       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
932 
933       cnt = 0;
934       for (k=0; k<nc; k++) {
935         for (l=lstart; l<lend+1; l++) {
936           for (p=pstart; p<pend+1; p++) {
937             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
938               cols[cnt++] = k + nc*(slot + gnx*l + p);
939             }
940           }
941         }
942         rows[k] = k + nc*(slot);
943       }
944       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
945     }
946   }
947   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
948   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
949   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
950   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
951 
952   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
953 
954   /*
955     For each node in the grid: we get the neighbors in the local (on processor ordering
956     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
957     PETSc ordering.
958   */
959   if (!da->prealloc_only) {
960     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
961     for (i=xs; i<xs+nx; i++) {
962 
963       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
964       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
965 
966       for (j=ys; j<ys+ny; j++) {
967         slot = i - gxs + gnx*(j - gys);
968 
969         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
970         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
971 
972         cnt = 0;
973         for (k=0; k<nc; k++) {
974           for (l=lstart; l<lend+1; l++) {
975             for (p=pstart; p<pend+1; p++) {
976               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
977                 cols[cnt++] = k + nc*(slot + gnx*l + p);
978               }
979             }
980           }
981           rows[k] = k + nc*(slot);
982         }
983         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
984       }
985     }
986     ierr = PetscFree(values);CHKERRQ(ierr);
987     /* do not copy values to GPU since they are all zero and not yet needed there */
988     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
989     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
992     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
993   }
994   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
995   PetscFunctionReturn(0);
996 }
997 
998 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
999 {
1000   PetscErrorCode         ierr;
1001   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1002   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1003   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1004   MPI_Comm               comm;
1005   PetscScalar            *values;
1006   DMBoundaryType         bx,by,bz;
1007   ISLocalToGlobalMapping ltog;
1008   DMDAStencilType        st;
1009 
1010   PetscFunctionBegin;
1011   /*
1012          nc - number of components per grid point
1013          col - number of colors needed in one direction for single component problem
1014 
1015   */
1016   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1017   col  = 2*s + 1;
1018   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1019   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1020   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1021 
1022   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1023   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1024 
1025   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1026   /* determine the matrix preallocation information */
1027   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1028   for (i=xs; i<xs+nx; i++) {
1029     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1030     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1031     for (j=ys; j<ys+ny; j++) {
1032       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1033       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1034       for (k=zs; k<zs+nz; k++) {
1035         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1036         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1037 
1038         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1039 
1040         cnt = 0;
1041         for (l=0; l<nc; l++) {
1042           for (ii=istart; ii<iend+1; ii++) {
1043             for (jj=jstart; jj<jend+1; jj++) {
1044               for (kk=kstart; kk<kend+1; kk++) {
1045                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1046                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1047                 }
1048               }
1049             }
1050           }
1051           rows[l] = l + nc*(slot);
1052         }
1053         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1054       }
1055     }
1056   }
1057   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1058   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1059   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1060   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1061   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1062 
1063   /*
1064     For each node in the grid: we get the neighbors in the local (on processor ordering
1065     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1066     PETSc ordering.
1067   */
1068   if (!da->prealloc_only) {
1069     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1070     for (i=xs; i<xs+nx; i++) {
1071       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1072       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1073       for (j=ys; j<ys+ny; j++) {
1074         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1075         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1076         for (k=zs; k<zs+nz; k++) {
1077           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1078           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1079 
1080           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1081 
1082           cnt = 0;
1083           for (l=0; l<nc; l++) {
1084             for (ii=istart; ii<iend+1; ii++) {
1085               for (jj=jstart; jj<jend+1; jj++) {
1086                 for (kk=kstart; kk<kend+1; kk++) {
1087                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1088                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1089                   }
1090                 }
1091               }
1092             }
1093             rows[l] = l + nc*(slot);
1094           }
1095           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1096         }
1097       }
1098     }
1099     ierr = PetscFree(values);CHKERRQ(ierr);
1100     /* do not copy values to GPU since they are all zero and not yet needed there */
1101     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1102     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1103     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1104     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1105     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1106   }
1107   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1112 {
1113   PetscErrorCode         ierr;
1114   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1115   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1116   MPI_Comm               comm;
1117   DMBoundaryType         bx,by;
1118   ISLocalToGlobalMapping ltog,mltog;
1119   DMDAStencilType        st;
1120   PetscBool              removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;
1121 
1122   PetscFunctionBegin;
1123   /*
1124          nc - number of components per grid point
1125          col - number of colors needed in one direction for single component problem
1126 
1127   */
1128   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1129   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1130     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1131   }
1132   col  = 2*s + 1;
1133   /*
1134        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1135        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1136   */
1137   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1138   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1139   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1140   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1142 
1143   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1144   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1145 
1146   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1147   /* determine the matrix preallocation information */
1148   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1149   for (i=xs; i<xs+nx; i++) {
1150     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1151     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1152 
1153     for (j=ys; j<ys+ny; j++) {
1154       slot = i - gxs + gnx*(j - gys);
1155 
1156       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1157       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1158 
1159       cnt = 0;
1160       for (k=0; k<nc; k++) {
1161         for (l=lstart; l<lend+1; l++) {
1162           for (p=pstart; p<pend+1; p++) {
1163             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1164               cols[cnt++] = k + nc*(slot + gnx*l + p);
1165             }
1166           }
1167         }
1168         rows[k] = k + nc*(slot);
1169       }
1170       if (removedups) {
1171         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1172       } else {
1173         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1174       }
1175     }
1176   }
1177   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1178   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1179   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1180   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1181   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1182   if (!mltog) {
1183     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1184   }
1185 
1186   /*
1187     For each node in the grid: we get the neighbors in the local (on processor ordering
1188     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1189     PETSc ordering.
1190   */
1191   if (!da->prealloc_only) {
1192     for (i=xs; i<xs+nx; i++) {
1193 
1194       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1195       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1196 
1197       for (j=ys; j<ys+ny; j++) {
1198         slot = i - gxs + gnx*(j - gys);
1199 
1200         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1201         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1202 
1203         cnt = 0;
1204         for (l=lstart; l<lend+1; l++) {
1205           for (p=pstart; p<pend+1; p++) {
1206             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1207               cols[cnt++] = nc*(slot + gnx*l + p);
1208               for (k=1; k<nc; k++) {
1209                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1210               }
1211             }
1212           }
1213         }
1214         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1215         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1216       }
1217     }
1218     /* do not copy values to GPU since they are all zero and not yet needed there */
1219     ierr = MatBoundToCPU(J,&alreadyboundtocpu);CHKERRQ(ierr);
1220     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1221     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1222     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1223     if (!alreadyboundtocpu) {ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);}
1224     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1225     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1226       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1227     }
1228   }
1229   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1234 {
1235   PetscErrorCode         ierr;
1236   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1237   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1238   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1239   DM_DA                  *dd = (DM_DA*)da->data;
1240   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1241   MPI_Comm               comm;
1242   DMBoundaryType         bx,by;
1243   ISLocalToGlobalMapping ltog;
1244   DMDAStencilType        st;
1245   PetscBool              removedups = PETSC_FALSE;
1246 
1247   PetscFunctionBegin;
1248   /*
1249          nc - number of components per grid point
1250          col - number of colors needed in one direction for single component problem
1251 
1252   */
1253   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1254   col  = 2*s + 1;
1255   /*
1256        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1257        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1258   */
1259   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1260   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1261   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1262   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1263   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1264 
1265   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1266   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1267 
1268   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1269   /* determine the matrix preallocation information */
1270   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1271   for (i=xs; i<xs+nx; i++) {
1272 
1273     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1274     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1275 
1276     for (j=ys; j<ys+ny; j++) {
1277       slot = i - gxs + gnx*(j - gys);
1278 
1279       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1280       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1281 
1282       for (k=0; k<nc; k++) {
1283         cnt = 0;
1284         for (l=lstart; l<lend+1; l++) {
1285           for (p=pstart; p<pend+1; p++) {
1286             if (l || p) {
1287               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1288                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1289               }
1290             } else {
1291               if (dfill) {
1292                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1293               } else {
1294                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1295               }
1296             }
1297           }
1298         }
1299         row    = k + nc*(slot);
1300         maxcnt = PetscMax(maxcnt,cnt);
1301         if (removedups) {
1302           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1303         } else {
1304           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1305         }
1306       }
1307     }
1308   }
1309   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1310   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1311   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1312   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1313 
1314   /*
1315     For each node in the grid: we get the neighbors in the local (on processor ordering
1316     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1317     PETSc ordering.
1318   */
1319   if (!da->prealloc_only) {
1320     for (i=xs; i<xs+nx; i++) {
1321 
1322       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1323       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1324 
1325       for (j=ys; j<ys+ny; j++) {
1326         slot = i - gxs + gnx*(j - gys);
1327 
1328         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1329         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1330 
1331         for (k=0; k<nc; k++) {
1332           cnt = 0;
1333           for (l=lstart; l<lend+1; l++) {
1334             for (p=pstart; p<pend+1; p++) {
1335               if (l || p) {
1336                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1337                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1338                 }
1339               } else {
1340                 if (dfill) {
1341                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1342                 } else {
1343                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1344                 }
1345               }
1346             }
1347           }
1348           row  = k + nc*(slot);
1349           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1350         }
1351       }
1352     }
1353     /* do not copy values to GPU since they are all zero and not yet needed there */
1354     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1355     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1356     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1357     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1358     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1359   }
1360   ierr = PetscFree(cols);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 /* ---------------------------------------------------------------------------------*/
1365 
1366 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1367 {
1368   PetscErrorCode         ierr;
1369   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1370   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1371   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1372   MPI_Comm               comm;
1373   DMBoundaryType         bx,by,bz;
1374   ISLocalToGlobalMapping ltog,mltog;
1375   DMDAStencilType        st;
1376   PetscBool              removedups = PETSC_FALSE;
1377 
1378   PetscFunctionBegin;
1379   /*
1380          nc - number of components per grid point
1381          col - number of colors needed in one direction for single component problem
1382 
1383   */
1384   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1385   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1386     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1387   }
1388   col  = 2*s + 1;
1389 
1390   /*
1391        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1392        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1393   */
1394   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1395   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1396   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1397 
1398   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1399   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1400   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1401 
1402   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1403   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1404 
1405   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1406   /* determine the matrix preallocation information */
1407   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1408   for (i=xs; i<xs+nx; i++) {
1409     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1410     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1411     for (j=ys; j<ys+ny; j++) {
1412       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1413       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1414       for (k=zs; k<zs+nz; k++) {
1415         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1416         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1417 
1418         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1419 
1420         cnt = 0;
1421         for (l=0; l<nc; l++) {
1422           for (ii=istart; ii<iend+1; ii++) {
1423             for (jj=jstart; jj<jend+1; jj++) {
1424               for (kk=kstart; kk<kend+1; kk++) {
1425                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1426                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1427                 }
1428               }
1429             }
1430           }
1431           rows[l] = l + nc*(slot);
1432         }
1433         if (removedups) {
1434           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1435         } else {
1436           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1437         }
1438       }
1439     }
1440   }
1441   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1442   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1443   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1444   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1445   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1446   if (!mltog) {
1447     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1448   }
1449 
1450   /*
1451     For each node in the grid: we get the neighbors in the local (on processor ordering
1452     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1453     PETSc ordering.
1454   */
1455   if (!da->prealloc_only) {
1456     for (i=xs; i<xs+nx; i++) {
1457       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1458       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1459       for (j=ys; j<ys+ny; j++) {
1460         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1461         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1462         for (k=zs; k<zs+nz; k++) {
1463           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1464           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1465 
1466           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1467 
1468           cnt = 0;
1469           for (kk=kstart; kk<kend+1; kk++) {
1470             for (jj=jstart; jj<jend+1; jj++) {
1471               for (ii=istart; ii<iend+1; ii++) {
1472                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1473                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1474                     for (l=1; l<nc; l++) {
1475                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1476                   }
1477                 }
1478               }
1479             }
1480           }
1481           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1482           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1483         }
1484       }
1485     }
1486     /* do not copy values to GPU since they are all zero and not yet needed there */
1487     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1488     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1489     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1490     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1491       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1492     }
1493     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1494     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1495   }
1496   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 /* ---------------------------------------------------------------------------------*/
1501 
1502 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1503 {
1504   PetscErrorCode         ierr;
1505   DM_DA                  *dd = (DM_DA*)da->data;
1506   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1507   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1508   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1509   DMBoundaryType         bx;
1510   ISLocalToGlobalMapping ltog;
1511   PetscMPIInt            rank,size;
1512 
1513   PetscFunctionBegin;
1514   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr);
1515   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
1516 
1517   /*
1518          nc - number of components per grid point
1519 
1520   */
1521   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1522   PetscCheckFalse(s > 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1523   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1524   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1525 
1526   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1527   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1528 
1529   /*
1530         note should be smaller for first and last process with no periodic
1531         does not handle dfill
1532   */
1533   cnt = 0;
1534   /* coupling with process to the left */
1535   for (i=0; i<s; i++) {
1536     for (j=0; j<nc; j++) {
1537       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1538       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1539       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1540         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1541         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1542       }
1543       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1544       cnt++;
1545     }
1546   }
1547   for (i=s; i<nx-s; i++) {
1548     for (j=0; j<nc; j++) {
1549       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1550       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1551       cnt++;
1552     }
1553   }
1554   /* coupling with process to the right */
1555   for (i=nx-s; i<nx; i++) {
1556     for (j=0; j<nc; j++) {
1557       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1558       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1559       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1560         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1561         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1562       }
1563       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1564       cnt++;
1565     }
1566   }
1567 
1568   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1569   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1570   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1571 
1572   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1573   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1574 
1575   /*
1576     For each node in the grid: we get the neighbors in the local (on processor ordering
1577     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1578     PETSc ordering.
1579   */
1580   if (!da->prealloc_only) {
1581     ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr);
1582     row = xs*nc;
1583     /* coupling with process to the left */
1584     for (i=xs; i<xs+s; i++) {
1585       for (j=0; j<nc; j++) {
1586         cnt = 0;
1587         if (rank) {
1588           for (l=0; l<s; l++) {
1589             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1590           }
1591         }
1592         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1593           for (l=0; l<s; l++) {
1594             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1595           }
1596         }
1597         if (dfill) {
1598           for (k=dfill[j]; k<dfill[j+1]; k++) {
1599             cols[cnt++] = i*nc + dfill[k];
1600           }
1601         } else {
1602           for (k=0; k<nc; k++) {
1603             cols[cnt++] = i*nc + k;
1604           }
1605         }
1606         for (l=0; l<s; l++) {
1607           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1608         }
1609         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1610         row++;
1611       }
1612     }
1613     for (i=xs+s; i<xs+nx-s; i++) {
1614       for (j=0; j<nc; j++) {
1615         cnt = 0;
1616         for (l=0; l<s; l++) {
1617           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1618         }
1619         if (dfill) {
1620           for (k=dfill[j]; k<dfill[j+1]; k++) {
1621             cols[cnt++] = i*nc + dfill[k];
1622           }
1623         } else {
1624           for (k=0; k<nc; k++) {
1625             cols[cnt++] = i*nc + k;
1626           }
1627         }
1628         for (l=0; l<s; l++) {
1629           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1630         }
1631         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1632         row++;
1633       }
1634     }
1635     /* coupling with process to the right */
1636     for (i=xs+nx-s; i<xs+nx; i++) {
1637       for (j=0; j<nc; j++) {
1638         cnt = 0;
1639         for (l=0; l<s; l++) {
1640           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1641         }
1642         if (dfill) {
1643           for (k=dfill[j]; k<dfill[j+1]; k++) {
1644             cols[cnt++] = i*nc + dfill[k];
1645           }
1646         } else {
1647           for (k=0; k<nc; k++) {
1648             cols[cnt++] = i*nc + k;
1649           }
1650         }
1651         if (rank < size-1) {
1652           for (l=0; l<s; l++) {
1653             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1654           }
1655         }
1656         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1657           for (l=0; l<s; l++) {
1658             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1659           }
1660         }
1661         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1662         row++;
1663       }
1664     }
1665     ierr = PetscFree(cols);CHKERRQ(ierr);
1666     /* do not copy values to GPU since they are all zero and not yet needed there */
1667     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1668     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1671     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1672   }
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 /* ---------------------------------------------------------------------------------*/
1677 
1678 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1679 {
1680   PetscErrorCode         ierr;
1681   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1682   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1683   PetscInt               istart,iend;
1684   DMBoundaryType         bx;
1685   ISLocalToGlobalMapping ltog,mltog;
1686 
1687   PetscFunctionBegin;
1688   /*
1689          nc - number of components per grid point
1690          col - number of colors needed in one direction for single component problem
1691 
1692   */
1693   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1694   if (bx == DM_BOUNDARY_NONE) {
1695     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1696   }
1697   col  = 2*s + 1;
1698 
1699   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1700   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1701 
1702   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1703   ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr);
1704   ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr);
1705 
1706   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1707   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1708   if (!mltog) {
1709     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1710   }
1711 
1712   /*
1713     For each node in the grid: we get the neighbors in the local (on processor ordering
1714     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1715     PETSc ordering.
1716   */
1717   if (!da->prealloc_only) {
1718     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1719     for (i=xs; i<xs+nx; i++) {
1720       istart = PetscMax(-s,gxs - i);
1721       iend   = PetscMin(s,gxs + gnx - i - 1);
1722       slot   = i - gxs;
1723 
1724       cnt = 0;
1725       for (i1=istart; i1<iend+1; i1++) {
1726         cols[cnt++] = nc*(slot + i1);
1727         for (l=1; l<nc; l++) {
1728           cols[cnt] = 1 + cols[cnt-1];cnt++;
1729         }
1730       }
1731       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1732       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1733     }
1734     /* do not copy values to GPU since they are all zero and not yet needed there */
1735     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1736     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1737     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1738     if (bx == DM_BOUNDARY_NONE) {
1739       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1740     }
1741     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1742     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1743     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 /* ---------------------------------------------------------------------------------*/
1749 
1750 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J)
1751 {
1752   PetscErrorCode         ierr;
1753   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1754   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1755   PetscInt               istart,iend;
1756   DMBoundaryType         bx;
1757   ISLocalToGlobalMapping ltog,mltog;
1758 
1759   PetscFunctionBegin;
1760   /*
1761          nc - number of components per grid point
1762          col - number of colors needed in one direction for single component problem
1763   */
1764   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1765   col  = 2*s + 1;
1766 
1767   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1768   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1769 
1770   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1771   ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr);
1772 
1773   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1774   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1775   if (!mltog) {
1776     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1777   }
1778 
1779   /*
1780     For each node in the grid: we get the neighbors in the local (on processor ordering
1781     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1782     PETSc ordering.
1783   */
1784   if (!da->prealloc_only) {
1785     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1786     for (i=xs; i<xs+nx; i++) {
1787       istart = PetscMax(-s,gxs - i);
1788       iend   = PetscMin(s,gxs + gnx - i - 1);
1789       slot   = i - gxs;
1790 
1791       cnt = 0;
1792       for (i1=istart; i1<iend+1; i1++) {
1793         cols[cnt++] = nc*(slot + i1);
1794         for (l=1; l<nc; l++) {
1795           cols[cnt] = 1 + cols[cnt-1];cnt++;
1796         }
1797       }
1798       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1799       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1800     }
1801     /* do not copy values to GPU since they are all zero and not yet needed there */
1802     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1803     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1804     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1805     if (bx == DM_BOUNDARY_NONE) {
1806       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1807     }
1808     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1809     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1810     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1811   }
1812   ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1817 {
1818   PetscErrorCode         ierr;
1819   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1820   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1821   PetscInt               istart,iend,jstart,jend,ii,jj;
1822   MPI_Comm               comm;
1823   PetscScalar            *values;
1824   DMBoundaryType         bx,by;
1825   DMDAStencilType        st;
1826   ISLocalToGlobalMapping ltog;
1827 
1828   PetscFunctionBegin;
1829   /*
1830      nc - number of components per grid point
1831      col - number of colors needed in one direction for single component problem
1832   */
1833   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1834   col  = 2*s + 1;
1835 
1836   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1837   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1838   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1839 
1840   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1841 
1842   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1843 
1844   /* determine the matrix preallocation information */
1845   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1846   for (i=xs; i<xs+nx; i++) {
1847     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1848     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1849     for (j=ys; j<ys+ny; j++) {
1850       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1851       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1852       slot   = i - gxs + gnx*(j - gys);
1853 
1854       /* Find block columns in block row */
1855       cnt = 0;
1856       for (ii=istart; ii<iend+1; ii++) {
1857         for (jj=jstart; jj<jend+1; jj++) {
1858           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1859             cols[cnt++] = slot + ii + gnx*jj;
1860           }
1861         }
1862       }
1863       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1864     }
1865   }
1866   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1867   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1868   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1869 
1870   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1871 
1872   /*
1873     For each node in the grid: we get the neighbors in the local (on processor ordering
1874     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1875     PETSc ordering.
1876   */
1877   if (!da->prealloc_only) {
1878     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1879     for (i=xs; i<xs+nx; i++) {
1880       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1881       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1882       for (j=ys; j<ys+ny; j++) {
1883         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1884         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1885         slot = i - gxs + gnx*(j - gys);
1886         cnt  = 0;
1887         for (ii=istart; ii<iend+1; ii++) {
1888           for (jj=jstart; jj<jend+1; jj++) {
1889             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1890               cols[cnt++] = slot + ii + gnx*jj;
1891             }
1892           }
1893         }
1894         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1895       }
1896     }
1897     ierr = PetscFree(values);CHKERRQ(ierr);
1898     /* do not copy values to GPU since they are all zero and not yet needed there */
1899     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1900     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1901     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1902     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1903     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1904   }
1905   ierr = PetscFree(cols);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1910 {
1911   PetscErrorCode         ierr;
1912   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1913   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1914   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1915   MPI_Comm               comm;
1916   PetscScalar            *values;
1917   DMBoundaryType         bx,by,bz;
1918   DMDAStencilType        st;
1919   ISLocalToGlobalMapping ltog;
1920 
1921   PetscFunctionBegin;
1922   /*
1923          nc - number of components per grid point
1924          col - number of colors needed in one direction for single component problem
1925 
1926   */
1927   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1928   col  = 2*s + 1;
1929 
1930   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1931   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1932   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1933 
1934   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1935 
1936   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1937 
1938   /* determine the matrix preallocation information */
1939   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1940   for (i=xs; i<xs+nx; i++) {
1941     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1942     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1943     for (j=ys; j<ys+ny; j++) {
1944       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1945       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1946       for (k=zs; k<zs+nz; k++) {
1947         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1948         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1949 
1950         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1951 
1952         /* Find block columns in block row */
1953         cnt = 0;
1954         for (ii=istart; ii<iend+1; ii++) {
1955           for (jj=jstart; jj<jend+1; jj++) {
1956             for (kk=kstart; kk<kend+1; kk++) {
1957               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1958                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1959               }
1960             }
1961           }
1962         }
1963         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1964       }
1965     }
1966   }
1967   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1968   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1969   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1970 
1971   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1972 
1973   /*
1974     For each node in the grid: we get the neighbors in the local (on processor ordering
1975     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1976     PETSc ordering.
1977   */
1978   if (!da->prealloc_only) {
1979     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1980     for (i=xs; i<xs+nx; i++) {
1981       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1982       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1983       for (j=ys; j<ys+ny; j++) {
1984         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1985         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1986         for (k=zs; k<zs+nz; k++) {
1987           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1988           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1989 
1990           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1991 
1992           cnt = 0;
1993           for (ii=istart; ii<iend+1; ii++) {
1994             for (jj=jstart; jj<jend+1; jj++) {
1995               for (kk=kstart; kk<kend+1; kk++) {
1996                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1997                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1998                 }
1999               }
2000             }
2001           }
2002           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2003         }
2004       }
2005     }
2006     ierr = PetscFree(values);CHKERRQ(ierr);
2007     /* do not copy values to GPU since they are all zero and not yet needed there */
2008     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2009     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2010     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2011     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2012     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2013   }
2014   ierr = PetscFree(cols);CHKERRQ(ierr);
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 /*
2019   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2020   identify in the local ordering with periodic domain.
2021 */
2022 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2023 {
2024   PetscErrorCode ierr;
2025   PetscInt       i,n;
2026 
2027   PetscFunctionBegin;
2028   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2029   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2030   for (i=0,n=0; i<*cnt; i++) {
2031     if (col[i] >= *row) col[n++] = col[i];
2032   }
2033   *cnt = n;
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2038 {
2039   PetscErrorCode         ierr;
2040   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2041   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2042   PetscInt               istart,iend,jstart,jend,ii,jj;
2043   MPI_Comm               comm;
2044   PetscScalar            *values;
2045   DMBoundaryType         bx,by;
2046   DMDAStencilType        st;
2047   ISLocalToGlobalMapping ltog;
2048 
2049   PetscFunctionBegin;
2050   /*
2051      nc - number of components per grid point
2052      col - number of colors needed in one direction for single component problem
2053   */
2054   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
2055   col  = 2*s + 1;
2056 
2057   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
2058   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
2059   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2060 
2061   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2062 
2063   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2064 
2065   /* determine the matrix preallocation information */
2066   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2067   for (i=xs; i<xs+nx; i++) {
2068     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2069     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2070     for (j=ys; j<ys+ny; j++) {
2071       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2072       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2073       slot   = i - gxs + gnx*(j - gys);
2074 
2075       /* Find block columns in block row */
2076       cnt = 0;
2077       for (ii=istart; ii<iend+1; ii++) {
2078         for (jj=jstart; jj<jend+1; jj++) {
2079           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2080             cols[cnt++] = slot + ii + gnx*jj;
2081           }
2082         }
2083       }
2084       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2085       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2086     }
2087   }
2088   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2089   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2090   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2091 
2092   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2093 
2094   /*
2095     For each node in the grid: we get the neighbors in the local (on processor ordering
2096     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2097     PETSc ordering.
2098   */
2099   if (!da->prealloc_only) {
2100     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2101     for (i=xs; i<xs+nx; i++) {
2102       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2103       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2104       for (j=ys; j<ys+ny; j++) {
2105         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2106         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2107         slot   = i - gxs + gnx*(j - gys);
2108 
2109         /* Find block columns in block row */
2110         cnt = 0;
2111         for (ii=istart; ii<iend+1; ii++) {
2112           for (jj=jstart; jj<jend+1; jj++) {
2113             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2114               cols[cnt++] = slot + ii + gnx*jj;
2115             }
2116           }
2117         }
2118         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2119         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2120       }
2121     }
2122     ierr = PetscFree(values);CHKERRQ(ierr);
2123     /* do not copy values to GPU since they are all zero and not yet needed there */
2124     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2125     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2126     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2127     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2128     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2129   }
2130   ierr = PetscFree(cols);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2135 {
2136   PetscErrorCode         ierr;
2137   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2138   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2139   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2140   MPI_Comm               comm;
2141   PetscScalar            *values;
2142   DMBoundaryType         bx,by,bz;
2143   DMDAStencilType        st;
2144   ISLocalToGlobalMapping ltog;
2145 
2146   PetscFunctionBegin;
2147   /*
2148      nc - number of components per grid point
2149      col - number of colors needed in one direction for single component problem
2150   */
2151   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2152   col  = 2*s + 1;
2153 
2154   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2155   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2156   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2157 
2158   /* create the matrix */
2159   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2160 
2161   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2162 
2163   /* determine the matrix preallocation information */
2164   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2165   for (i=xs; i<xs+nx; i++) {
2166     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2167     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2168     for (j=ys; j<ys+ny; j++) {
2169       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2170       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2171       for (k=zs; k<zs+nz; k++) {
2172         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2173         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2174 
2175         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2176 
2177         /* Find block columns in block row */
2178         cnt = 0;
2179         for (ii=istart; ii<iend+1; ii++) {
2180           for (jj=jstart; jj<jend+1; jj++) {
2181             for (kk=kstart; kk<kend+1; kk++) {
2182               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2183                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2184               }
2185             }
2186           }
2187         }
2188         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2189         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2190       }
2191     }
2192   }
2193   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2194   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2195   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2196 
2197   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2198 
2199   /*
2200     For each node in the grid: we get the neighbors in the local (on processor ordering
2201     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2202     PETSc ordering.
2203   */
2204   if (!da->prealloc_only) {
2205     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2206     for (i=xs; i<xs+nx; i++) {
2207       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2208       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2209       for (j=ys; j<ys+ny; j++) {
2210         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2211         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2212         for (k=zs; k<zs+nz; k++) {
2213           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2214           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2215 
2216           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2217 
2218           cnt = 0;
2219           for (ii=istart; ii<iend+1; ii++) {
2220             for (jj=jstart; jj<jend+1; jj++) {
2221               for (kk=kstart; kk<kend+1; kk++) {
2222                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2223                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2224                 }
2225               }
2226             }
2227           }
2228           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2229           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2230         }
2231       }
2232     }
2233     ierr = PetscFree(values);CHKERRQ(ierr);
2234     /* do not copy values to GPU since they are all zero and not yet needed there */
2235     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2236     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2237     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2238     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2239     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2240   }
2241   ierr = PetscFree(cols);CHKERRQ(ierr);
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 /* ---------------------------------------------------------------------------------*/
2246 
2247 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2248 {
2249   PetscErrorCode         ierr;
2250   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2251   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2252   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2253   DM_DA                  *dd = (DM_DA*)da->data;
2254   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2255   MPI_Comm               comm;
2256   PetscScalar            *values;
2257   DMBoundaryType         bx,by,bz;
2258   ISLocalToGlobalMapping ltog;
2259   DMDAStencilType        st;
2260   PetscBool              removedups = PETSC_FALSE;
2261 
2262   PetscFunctionBegin;
2263   /*
2264          nc - number of components per grid point
2265          col - number of colors needed in one direction for single component problem
2266 
2267   */
2268   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2269   col  = 2*s + 1;
2270   PetscCheckFalse(bx == DM_BOUNDARY_PERIODIC && (m % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2271                  by 2*stencil_width + 1\n");
2272   PetscCheckFalse(by == DM_BOUNDARY_PERIODIC && (n % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2273                  by 2*stencil_width + 1\n");
2274   PetscCheckFalse(bz == DM_BOUNDARY_PERIODIC && (p % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2275                  by 2*stencil_width + 1\n");
2276 
2277   /*
2278        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2279        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2280   */
2281   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2282   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2283   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2284 
2285   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2286   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2287   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2288 
2289   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2290   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2291 
2292   /* determine the matrix preallocation information */
2293   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2294 
2295   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2296   for (i=xs; i<xs+nx; i++) {
2297     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2298     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2299     for (j=ys; j<ys+ny; j++) {
2300       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2301       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2302       for (k=zs; k<zs+nz; k++) {
2303         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2304         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2305 
2306         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2307 
2308         for (l=0; l<nc; l++) {
2309           cnt = 0;
2310           for (ii=istart; ii<iend+1; ii++) {
2311             for (jj=jstart; jj<jend+1; jj++) {
2312               for (kk=kstart; kk<kend+1; kk++) {
2313                 if (ii || jj || kk) {
2314                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2315                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2316                   }
2317                 } else {
2318                   if (dfill) {
2319                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2320                   } else {
2321                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2322                   }
2323                 }
2324               }
2325             }
2326           }
2327           row  = l + nc*(slot);
2328           maxcnt = PetscMax(maxcnt,cnt);
2329           if (removedups) {
2330             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2331           } else {
2332             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2333           }
2334         }
2335       }
2336     }
2337   }
2338   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2339   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2340   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2341   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2342 
2343   /*
2344     For each node in the grid: we get the neighbors in the local (on processor ordering
2345     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2346     PETSc ordering.
2347   */
2348   if (!da->prealloc_only) {
2349     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2350     for (i=xs; i<xs+nx; i++) {
2351       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2352       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2353       for (j=ys; j<ys+ny; j++) {
2354         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2355         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2356         for (k=zs; k<zs+nz; k++) {
2357           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2358           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2359 
2360           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2361 
2362           for (l=0; l<nc; l++) {
2363             cnt = 0;
2364             for (ii=istart; ii<iend+1; ii++) {
2365               for (jj=jstart; jj<jend+1; jj++) {
2366                 for (kk=kstart; kk<kend+1; kk++) {
2367                   if (ii || jj || kk) {
2368                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2369                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2370                     }
2371                   } else {
2372                     if (dfill) {
2373                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2374                     } else {
2375                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2376                     }
2377                   }
2378                 }
2379               }
2380             }
2381             row  = l + nc*(slot);
2382             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2383           }
2384         }
2385       }
2386     }
2387     ierr = PetscFree(values);CHKERRQ(ierr);
2388     /* do not copy values to GPU since they are all zero and not yet needed there */
2389     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2390     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2391     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2393     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2394   }
2395   ierr = PetscFree(cols);CHKERRQ(ierr);
2396   PetscFunctionReturn(0);
2397 }
2398