xref: /petsc/src/dm/impls/da/fdda.c (revision 91e63d38360eb9bc922f79d792328cc4769c01ac)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
18 {
19   PetscErrorCode ierr;
20   PetscInt       i,j,nz,*fill;
21 
22   PetscFunctionBegin;
23   if (!dfill) PetscFunctionReturn(0);
24 
25   /* count number nonzeros */
26   nz = 0;
27   for (i=0; i<w; i++) {
28     for (j=0; j<w; j++) {
29       if (dfill[w*i+j]) nz++;
30     }
31   }
32   ierr = PetscMalloc1(nz + w + 1,&fill);CHKERRQ(ierr);
33   /* construct modified CSR storage of nonzero structure */
34   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
35    so fill[1] - fill[0] gives number of nonzeros in first row etc */
36   nz = w + 1;
37   for (i=0; i<w; i++) {
38     fill[i] = nz;
39     for (j=0; j<w; j++) {
40       if (dfill[w*i+j]) {
41         fill[nz] = j;
42         nz++;
43       }
44     }
45   }
46   fill[w] = nz;
47 
48   *rfill = fill;
49   PetscFunctionReturn(0);
50 }
51 
52 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
53 {
54   PetscErrorCode ierr;
55   PetscInt       nz;
56 
57   PetscFunctionBegin;
58   if (!dfillsparse) PetscFunctionReturn(0);
59 
60   /* Determine number of non-zeros */
61   nz = (dfillsparse[w] - w - 1);
62 
63   /* Allocate space for our copy of the given sparse matrix representation. */
64   ierr = PetscMalloc1(nz + w + 1,rfill);CHKERRQ(ierr);
65   ierr = PetscArraycpy(*rfill,dfillsparse,nz+w+1);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
70 {
71   PetscErrorCode ierr;
72   PetscInt       i,k,cnt = 1;
73 
74   PetscFunctionBegin;
75 
76   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
77    columns to the left with any nonzeros in them plus 1 */
78   ierr = PetscCalloc1(dd->w,&dd->ofillcols);CHKERRQ(ierr);
79   for (i=0; i<dd->w; i++) {
80     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
81   }
82   for (i=0; i<dd->w; i++) {
83     if (dd->ofillcols[i]) {
84       dd->ofillcols[i] = cnt++;
85     }
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 /*@
91     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
92     of the matrix returned by DMCreateMatrix().
93 
94     Logically Collective on da
95 
96     Input Parameters:
97 +   da - the distributed array
98 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
99 -   ofill - the fill pattern in the off-diagonal blocks
100 
101     Level: developer
102 
103     Notes:
104     This only makes sense when you are doing multicomponent problems but using the
105        MPIAIJ matrix format
106 
107            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
108        representing coupling and 0 entries for missing coupling. For example
109 $             dfill[9] = {1, 0, 0,
110 $                         1, 1, 0,
111 $                         0, 1, 1}
112        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
113        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
114        diagonal block).
115 
116      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
117      can be represented in the dfill, ofill format
118 
119    Contributed by Glenn Hammond
120 
121 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
122 
123 @*/
124 PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
125 {
126   DM_DA          *dd = (DM_DA*)da->data;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   /* save the given dfill and ofill information */
131   ierr = DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);CHKERRQ(ierr);
132   ierr = DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);CHKERRQ(ierr);
133 
134   /* count nonzeros in ofill columns */
135   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
136 
137   PetscFunctionReturn(0);
138 }
139 
140 /*@
141     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
142     of the matrix returned by DMCreateMatrix(), using sparse representations
143     of fill patterns.
144 
145     Logically Collective on da
146 
147     Input Parameters:
148 +   da - the distributed array
149 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
150 -   ofill - the sparse fill pattern in the off-diagonal blocks
151 
152     Level: developer
153 
154     Notes: This only makes sense when you are doing multicomponent problems but using the
155        MPIAIJ matrix format
156 
157            The format for dfill and ofill is a sparse representation of a
158            dof-by-dof matrix with 1 entries representing coupling and 0 entries
159            for missing coupling.  The sparse representation is a 1 dimensional
160            array of length nz + dof + 1, where nz is the number of non-zeros in
161            the matrix.  The first dof entries in the array give the
162            starting array indices of each row's items in the rest of the array,
163            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
164            and the remaining nz items give the column indices of each of
165            the 1s within the logical 2D matrix.  Each row's items within
166            the array are the column indices of the 1s within that row
167            of the 2D matrix.  PETSc developers may recognize that this is the
168            same format as that computed by the DMDASetBlockFills_Private()
169            function from a dense 2D matrix representation.
170 
171      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
172      can be represented in the dfill, ofill format
173 
174    Contributed by Philip C. Roth
175 
176 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()
177 
178 @*/
179 PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
180 {
181   DM_DA          *dd = (DM_DA*)da->data;
182   PetscErrorCode ierr;
183 
184   PetscFunctionBegin;
185   /* save the given dfill and ofill information */
186   ierr = DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);CHKERRQ(ierr);
187   ierr = DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);CHKERRQ(ierr);
188 
189   /* count nonzeros in ofill columns */
190   ierr = DMDASetBlockFills_Private2(dd);CHKERRQ(ierr);
191 
192   PetscFunctionReturn(0);
193 }
194 
195 PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
196 {
197   PetscErrorCode   ierr;
198   PetscInt         dim,m,n,p,nc;
199   DMBoundaryType   bx,by,bz;
200   MPI_Comm         comm;
201   PetscMPIInt      size;
202   PetscBool        isBAIJ;
203   DM_DA            *dd = (DM_DA*)da->data;
204 
205   PetscFunctionBegin;
206   /*
207                                   m
208           ------------------------------------------------------
209          |                                                     |
210          |                                                     |
211          |               ----------------------                |
212          |               |                    |                |
213       n  |           yn  |                    |                |
214          |               |                    |                |
215          |               .---------------------                |
216          |             (xs,ys)     xn                          |
217          |            .                                        |
218          |         (gxs,gys)                                   |
219          |                                                     |
220           -----------------------------------------------------
221   */
222 
223   /*
224          nc - number of components per grid point
225          col - number of colors needed in one direction for single component problem
226 
227   */
228   ierr = DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);CHKERRQ(ierr);
229 
230   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
231   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
232   if (ctype == IS_COLORING_LOCAL) {
233     if (size == 1) {
234       ctype = IS_COLORING_GLOBAL;
235     } else if (dim > 1) {
236       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
237         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
238       }
239     }
240   }
241 
242   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
243      matrices is for the blocks, not the individual matrix elements  */
244   ierr = PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);CHKERRQ(ierr);
245   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr);}
246   if (!isBAIJ) {ierr = PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr);}
247   if (isBAIJ) {
248     dd->w  = 1;
249     dd->xs = dd->xs/nc;
250     dd->xe = dd->xe/nc;
251     dd->Xs = dd->Xs/nc;
252     dd->Xe = dd->Xe/nc;
253   }
254 
255   /*
256      We do not provide a getcoloring function in the DMDA operations because
257    the basic DMDA does not know about matrices. We think of DMDA as being
258    more low-level then matrices.
259   */
260   if (dim == 1) {
261     ierr = DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
262   } else if (dim == 2) {
263     ierr = DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
264   } else if (dim == 3) {
265     ierr = DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
266   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
267   if (isBAIJ) {
268     dd->w  = nc;
269     dd->xs = dd->xs*nc;
270     dd->xe = dd->xe*nc;
271     dd->Xs = dd->Xs*nc;
272     dd->Xe = dd->Xe*nc;
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 /* ---------------------------------------------------------------------------------*/
278 
279 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
280 {
281   PetscErrorCode  ierr;
282   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
283   PetscInt        ncolors;
284   MPI_Comm        comm;
285   DMBoundaryType  bx,by;
286   DMDAStencilType st;
287   ISColoringValue *colors;
288   DM_DA           *dd = (DM_DA*)da->data;
289 
290   PetscFunctionBegin;
291   /*
292          nc - number of components per grid point
293          col - number of colors needed in one direction for single component problem
294 
295   */
296   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
297   col  = 2*s + 1;
298   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
299   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
300   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
301 
302   /* special case as taught to us by Paul Hovland */
303   if (st == DMDA_STENCIL_STAR && s == 1) {
304     ierr = DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);CHKERRQ(ierr);
305   } else {
306     if (ctype == IS_COLORING_GLOBAL) {
307       if (!dd->localcoloring) {
308         ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
309         ii   = 0;
310         for (j=ys; j<ys+ny; j++) {
311           for (i=xs; i<xs+nx; i++) {
312             for (k=0; k<nc; k++) {
313               colors[ii++] = k + nc*((i % col) + col*(j % col));
314             }
315           }
316         }
317         ncolors = nc + nc*(col-1 + col*(col-1));
318         ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
319       }
320       *coloring = dd->localcoloring;
321     } else if (ctype == IS_COLORING_LOCAL) {
322       if (!dd->ghostedcoloring) {
323         ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
324         ii   = 0;
325         for (j=gys; j<gys+gny; j++) {
326           for (i=gxs; i<gxs+gnx; i++) {
327             for (k=0; k<nc; k++) {
328               /* the complicated stuff is to handle periodic boundaries */
329               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
330             }
331           }
332         }
333         ncolors = nc + nc*(col - 1 + col*(col-1));
334         ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
335         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
336 
337         ierr = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
338       }
339       *coloring = dd->ghostedcoloring;
340     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
341   }
342   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 /* ---------------------------------------------------------------------------------*/
347 
348 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
349 {
350   PetscErrorCode  ierr;
351   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
352   PetscInt        ncolors;
353   MPI_Comm        comm;
354   DMBoundaryType  bx,by,bz;
355   DMDAStencilType st;
356   ISColoringValue *colors;
357   DM_DA           *dd = (DM_DA*)da->data;
358 
359   PetscFunctionBegin;
360   /*
361          nc - number of components per grid point
362          col - number of colors needed in one direction for single component problem
363 
364   */
365   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
366   col  = 2*s + 1;
367   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
368   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
369   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
370 
371   /* create the coloring */
372   if (ctype == IS_COLORING_GLOBAL) {
373     if (!dd->localcoloring) {
374       ierr = PetscMalloc1(nc*nx*ny*nz,&colors);CHKERRQ(ierr);
375       ii   = 0;
376       for (k=zs; k<zs+nz; k++) {
377         for (j=ys; j<ys+ny; j++) {
378           for (i=xs; i<xs+nx; i++) {
379             for (l=0; l<nc; l++) {
380               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
381             }
382           }
383         }
384       }
385       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
386       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
387     }
388     *coloring = dd->localcoloring;
389   } else if (ctype == IS_COLORING_LOCAL) {
390     if (!dd->ghostedcoloring) {
391       ierr = PetscMalloc1(nc*gnx*gny*gnz,&colors);CHKERRQ(ierr);
392       ii   = 0;
393       for (k=gzs; k<gzs+gnz; k++) {
394         for (j=gys; j<gys+gny; j++) {
395           for (i=gxs; i<gxs+gnx; i++) {
396             for (l=0; l<nc; l++) {
397               /* the complicated stuff is to handle periodic boundaries */
398               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
399             }
400           }
401         }
402       }
403       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
404       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
405       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
406     }
407     *coloring = dd->ghostedcoloring;
408   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
409   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 /* ---------------------------------------------------------------------------------*/
414 
415 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
416 {
417   PetscErrorCode  ierr;
418   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
419   PetscInt        ncolors;
420   MPI_Comm        comm;
421   DMBoundaryType  bx;
422   ISColoringValue *colors;
423   DM_DA           *dd = (DM_DA*)da->data;
424 
425   PetscFunctionBegin;
426   /*
427          nc - number of components per grid point
428          col - number of colors needed in one direction for single component problem
429 
430   */
431   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
432   col  = 2*s + 1;
433   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
434   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
435   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
436 
437   /* create the coloring */
438   if (ctype == IS_COLORING_GLOBAL) {
439     if (!dd->localcoloring) {
440       ierr = PetscMalloc1(nc*nx,&colors);CHKERRQ(ierr);
441       if (dd->ofillcols) {
442         PetscInt tc = 0;
443         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
444         i1 = 0;
445         for (i=xs; i<xs+nx; i++) {
446           for (l=0; l<nc; l++) {
447             if (dd->ofillcols[l] && (i % col)) {
448               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
449             } else {
450               colors[i1++] = l;
451             }
452           }
453         }
454         ncolors = nc + 2*s*tc;
455       } else {
456         i1 = 0;
457         for (i=xs; i<xs+nx; i++) {
458           for (l=0; l<nc; l++) {
459             colors[i1++] = l + nc*(i % col);
460           }
461         }
462         ncolors = nc + nc*(col-1);
463       }
464       ierr = ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
465     }
466     *coloring = dd->localcoloring;
467   } else if (ctype == IS_COLORING_LOCAL) {
468     if (!dd->ghostedcoloring) {
469       ierr = PetscMalloc1(nc*gnx,&colors);CHKERRQ(ierr);
470       i1   = 0;
471       for (i=gxs; i<gxs+gnx; i++) {
472         for (l=0; l<nc; l++) {
473           /* the complicated stuff is to handle periodic boundaries */
474           colors[i1++] = l + nc*(SetInRange(i,m) % col);
475         }
476       }
477       ncolors = nc + nc*(col-1);
478       ierr    = ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
479       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
480     }
481     *coloring = dd->ghostedcoloring;
482   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
483   ierr = ISColoringReference(*coloring);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
488 {
489   PetscErrorCode  ierr;
490   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
491   PetscInt        ncolors;
492   MPI_Comm        comm;
493   DMBoundaryType  bx,by;
494   ISColoringValue *colors;
495   DM_DA           *dd = (DM_DA*)da->data;
496 
497   PetscFunctionBegin;
498   /*
499          nc - number of components per grid point
500          col - number of colors needed in one direction for single component problem
501 
502   */
503   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);CHKERRQ(ierr);
504   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
505   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
506   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
507   /* create the coloring */
508   if (ctype == IS_COLORING_GLOBAL) {
509     if (!dd->localcoloring) {
510       ierr = PetscMalloc1(nc*nx*ny,&colors);CHKERRQ(ierr);
511       ii   = 0;
512       for (j=ys; j<ys+ny; j++) {
513         for (i=xs; i<xs+nx; i++) {
514           for (k=0; k<nc; k++) {
515             colors[ii++] = k + nc*((3*j+i) % 5);
516           }
517         }
518       }
519       ncolors = 5*nc;
520       ierr    = ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);CHKERRQ(ierr);
521     }
522     *coloring = dd->localcoloring;
523   } else if (ctype == IS_COLORING_LOCAL) {
524     if (!dd->ghostedcoloring) {
525       ierr = PetscMalloc1(nc*gnx*gny,&colors);CHKERRQ(ierr);
526       ii = 0;
527       for (j=gys; j<gys+gny; j++) {
528         for (i=gxs; i<gxs+gnx; i++) {
529           for (k=0; k<nc; k++) {
530             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
531           }
532         }
533       }
534       ncolors = 5*nc;
535       ierr    = ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);CHKERRQ(ierr);
536       ierr    = ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);CHKERRQ(ierr);
537     }
538     *coloring = dd->ghostedcoloring;
539   } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
540   PetscFunctionReturn(0);
541 }
542 
543 /* =========================================================================== */
544 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat);
545 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
546 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat);
547 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat);
548 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
549 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat);
550 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
551 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
552 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
553 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
554 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
555 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
556 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
557 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);
558 
559 /*@C
560    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix
561 
562    Logically Collective on mat
563 
564    Input Parameters:
565 +  mat - the matrix
566 -  da - the da
567 
568    Level: intermediate
569 
570 @*/
571 PetscErrorCode MatSetupDM(Mat mat,DM da)
572 {
573   PetscErrorCode ierr;
574 
575   PetscFunctionBegin;
576   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
577   PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA);
578   ierr = PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
583 {
584   DM                da;
585   PetscErrorCode    ierr;
586   const char        *prefix;
587   Mat               Anatural;
588   AO                ao;
589   PetscInt          rstart,rend,*petsc,i;
590   IS                is;
591   MPI_Comm          comm;
592   PetscViewerFormat format;
593 
594   PetscFunctionBegin;
595   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
596   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
597   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
598 
599   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
600   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
601   PetscCheckFalse(!da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
602 
603   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
604   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
605   ierr = PetscMalloc1(rend-rstart,&petsc);CHKERRQ(ierr);
606   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
607   ierr = AOApplicationToPetsc(ao,rend-rstart,petsc);CHKERRQ(ierr);
608   ierr = ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
609 
610   /* call viewer on natural ordering */
611   ierr = MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);CHKERRQ(ierr);
612   ierr = ISDestroy(&is);CHKERRQ(ierr);
613   ierr = PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);CHKERRQ(ierr);
614   ierr = PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);CHKERRQ(ierr);
615   ierr = PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);CHKERRQ(ierr);
616   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
617   ierr = MatView(Anatural,viewer);CHKERRQ(ierr);
618   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
619   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
624 {
625   DM             da;
626   PetscErrorCode ierr;
627   Mat            Anatural,Aapp;
628   AO             ao;
629   PetscInt       rstart,rend,*app,i,m,n,M,N;
630   IS             is;
631   MPI_Comm       comm;
632 
633   PetscFunctionBegin;
634   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
635   ierr = MatGetDM(A, &da);CHKERRQ(ierr);
636   PetscCheckFalse(!da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");
637 
638   /* Load the matrix in natural ordering */
639   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Anatural);CHKERRQ(ierr);
640   ierr = MatSetType(Anatural,((PetscObject)A)->type_name);CHKERRQ(ierr);
641   ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
642   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
643   ierr = MatSetSizes(Anatural,m,n,M,N);CHKERRQ(ierr);
644   ierr = MatLoad(Anatural,viewer);CHKERRQ(ierr);
645 
646   /* Map natural ordering to application ordering and create IS */
647   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
648   ierr = MatGetOwnershipRange(Anatural,&rstart,&rend);CHKERRQ(ierr);
649   ierr = PetscMalloc1(rend-rstart,&app);CHKERRQ(ierr);
650   for (i=rstart; i<rend; i++) app[i-rstart] = i;
651   ierr = AOPetscToApplication(ao,rend-rstart,app);CHKERRQ(ierr);
652   ierr = ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
653 
654   /* Do permutation and replace header */
655   ierr = MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);CHKERRQ(ierr);
656   ierr = MatHeaderReplace(A,&Aapp);CHKERRQ(ierr);
657   ierr = ISDestroy(&is);CHKERRQ(ierr);
658   ierr = MatDestroy(&Anatural);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
663 {
664   PetscErrorCode ierr;
665   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
666   Mat            A;
667   MPI_Comm       comm;
668   MatType        Atype;
669   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
670   MatType        mtype;
671   PetscMPIInt    size;
672   DM_DA          *dd = (DM_DA*)da->data;
673 
674   PetscFunctionBegin;
675   ierr = MatInitializePackage();CHKERRQ(ierr);
676   mtype = da->mattype;
677 
678   /*
679                                   m
680           ------------------------------------------------------
681          |                                                     |
682          |                                                     |
683          |               ----------------------                |
684          |               |                    |                |
685       n  |           ny  |                    |                |
686          |               |                    |                |
687          |               .---------------------                |
688          |             (xs,ys)     nx                          |
689          |            .                                        |
690          |         (gxs,gys)                                   |
691          |                                                     |
692           -----------------------------------------------------
693   */
694 
695   /*
696          nc - number of components per grid point
697          col - number of colors needed in one direction for single component problem
698 
699   */
700   M   = dd->M;
701   N   = dd->N;
702   P   = dd->P;
703   dim = da->dim;
704   dof = dd->w;
705   /* ierr = DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); */
706   ierr = DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);CHKERRQ(ierr);
707   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
708   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
709   ierr = MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);CHKERRQ(ierr);
710   ierr = MatSetType(A,mtype);CHKERRQ(ierr);
711   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
712   if (dof*nx*ny*nz < da->bind_below) {
713     ierr = MatSetBindingPropagates(A,PETSC_TRUE);CHKERRQ(ierr);
714     ierr = MatBindToCPU(A,PETSC_TRUE);CHKERRQ(ierr);
715   }
716   ierr = MatSetDM(A,da);CHKERRQ(ierr);
717   if (da->structure_only) {
718     ierr = MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);CHKERRQ(ierr);
719   }
720   ierr = MatGetType(A,&Atype);CHKERRQ(ierr);
721   /*
722      We do not provide a getmatrix function in the DMDA operations because
723    the basic DMDA does not know about matrices. We think of DMDA as being more
724    more low-level than matrices. This is kind of cheating but, cause sometimes
725    we think of DMDA has higher level than matrices.
726 
727      We could switch based on Atype (or mtype), but we do not since the
728    specialized setting routines depend only on the particular preallocation
729    details of the matrix, not the type itself.
730   */
731   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
732   if (!aij) {
733     ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);CHKERRQ(ierr);
734   }
735   if (!aij) {
736     ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
737     if (!baij) {
738       ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);CHKERRQ(ierr);
739     }
740     if (!baij) {
741       ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
742       if (!sbaij) {
743         ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);CHKERRQ(ierr);
744       }
745       if (!sbaij) {
746         ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);CHKERRQ(ierr);
747         if (!sell) {
748           ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);CHKERRQ(ierr);
749         }
750       }
751       if (!sell) {
752         ierr = PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);CHKERRQ(ierr);
753       }
754     }
755   }
756   if (aij) {
757     if (dim == 1) {
758       if (dd->ofill) {
759         ierr = DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
760       } else {
761         DMBoundaryType bx;
762         PetscMPIInt  size;
763         ierr = DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
764         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
765         if (size == 1 && bx == DM_BOUNDARY_NONE) {
766           ierr = DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A);CHKERRQ(ierr);
767         } else {
768           ierr = DMCreateMatrix_DA_1d_MPIAIJ(da,A);CHKERRQ(ierr);
769         }
770       }
771     } else if (dim == 2) {
772       if (dd->ofill) {
773         ierr = DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
774       } else {
775         ierr = DMCreateMatrix_DA_2d_MPIAIJ(da,A);CHKERRQ(ierr);
776       }
777     } else if (dim == 3) {
778       if (dd->ofill) {
779         ierr = DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);CHKERRQ(ierr);
780       } else {
781         ierr = DMCreateMatrix_DA_3d_MPIAIJ(da,A);CHKERRQ(ierr);
782       }
783     }
784   } else if (baij) {
785     if (dim == 2) {
786       ierr = DMCreateMatrix_DA_2d_MPIBAIJ(da,A);CHKERRQ(ierr);
787     } else if (dim == 3) {
788       ierr = DMCreateMatrix_DA_3d_MPIBAIJ(da,A);CHKERRQ(ierr);
789     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
790   } else if (sbaij) {
791     if (dim == 2) {
792       ierr = DMCreateMatrix_DA_2d_MPISBAIJ(da,A);CHKERRQ(ierr);
793     } else if (dim == 3) {
794       ierr = DMCreateMatrix_DA_3d_MPISBAIJ(da,A);CHKERRQ(ierr);
795     } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
796   } else if (sell) {
797      if (dim == 2) {
798        ierr = DMCreateMatrix_DA_2d_MPISELL(da,A);CHKERRQ(ierr);
799      } else if (dim == 3) {
800        ierr = DMCreateMatrix_DA_3d_MPISELL(da,A);CHKERRQ(ierr);
801      } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
802   } else if (is) {
803     ierr = DMCreateMatrix_DA_IS(da,A);CHKERRQ(ierr);
804   } else {
805     ISLocalToGlobalMapping ltog;
806 
807     ierr = MatSetBlockSize(A,dof);CHKERRQ(ierr);
808     ierr = MatSetUp(A);CHKERRQ(ierr);
809     ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
810     ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
811   }
812   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
813   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
814   ierr = MatSetDM(A,da);CHKERRQ(ierr);
815   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
816   if (size > 1) {
817     /* change viewer to display matrix in natural ordering */
818     ierr = MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);CHKERRQ(ierr);
819     ierr = MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);CHKERRQ(ierr);
820   }
821   *J = A;
822   PetscFunctionReturn(0);
823 }
824 
825 /* ---------------------------------------------------------------------------------*/
826 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
827 
828 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
829 {
830   DM_DA                  *da = (DM_DA*)dm->data;
831   Mat                    lJ,P;
832   ISLocalToGlobalMapping ltog;
833   IS                     is;
834   PetscBT                bt;
835   const PetscInt         *e_loc,*idx;
836   PetscInt               i,nel,nen,nv,dof,*gidx,n,N;
837   PetscErrorCode         ierr;
838 
839   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
840      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
841   PetscFunctionBegin;
842   dof  = da->w;
843   ierr = MatSetBlockSize(J,dof);CHKERRQ(ierr);
844   ierr = DMGetLocalToGlobalMapping(dm,&ltog);CHKERRQ(ierr);
845 
846   /* flag local elements indices in local DMDA numbering */
847   ierr = ISLocalToGlobalMappingGetSize(ltog,&nv);CHKERRQ(ierr);
848   ierr = PetscBTCreate(nv/dof,&bt);CHKERRQ(ierr);
849   ierr = DMDAGetElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
850   for (i=0;i<nel*nen;i++) { ierr = PetscBTSet(bt,e_loc[i]);CHKERRQ(ierr); }
851 
852   /* filter out (set to -1) the global indices not used by the local elements */
853   ierr = PetscMalloc1(nv/dof,&gidx);CHKERRQ(ierr);
854   ierr = ISLocalToGlobalMappingGetBlockIndices(ltog,&idx);CHKERRQ(ierr);
855   ierr = PetscArraycpy(gidx,idx,nv/dof);CHKERRQ(ierr);
856   ierr = ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx);CHKERRQ(ierr);
857   for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1;
858   ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
859   ierr = ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is);CHKERRQ(ierr);
860   ierr = ISLocalToGlobalMappingCreateIS(is,&ltog);CHKERRQ(ierr);
861   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
862   ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
863   ierr = ISDestroy(&is);CHKERRQ(ierr);
864 
865   /* Preallocation */
866   ierr = MatISGetLocalMat(J,&lJ);CHKERRQ(ierr);
867   ierr = MatGetLocalToGlobalMapping(lJ,&ltog,NULL);CHKERRQ(ierr);
868   ierr = MatCreate(PetscObjectComm((PetscObject)lJ),&P);CHKERRQ(ierr);
869   ierr = MatSetType(P,MATPREALLOCATOR);CHKERRQ(ierr);
870   ierr = MatSetLocalToGlobalMapping(P,ltog,ltog);CHKERRQ(ierr);
871   ierr = MatGetSize(lJ,&N,NULL);CHKERRQ(ierr);
872   ierr = MatGetLocalSize(lJ,&n,NULL);CHKERRQ(ierr);
873   ierr = MatSetSizes(P,n,n,N,N);CHKERRQ(ierr);
874   ierr = MatSetBlockSize(P,dof);CHKERRQ(ierr);
875   ierr = MatSetUp(P);CHKERRQ(ierr);
876   for (i=0;i<nel;i++) {
877     ierr = MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES);CHKERRQ(ierr);
878   }
879   ierr = MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ);CHKERRQ(ierr);
880   ierr = MatISRestoreLocalMat(J,&lJ);CHKERRQ(ierr);
881   ierr = DMDARestoreElements(dm,&nel,&nen,&e_loc);CHKERRQ(ierr);
882   ierr = MatDestroy(&P);CHKERRQ(ierr);
883 
884   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
885   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
890 {
891   PetscErrorCode         ierr;
892   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
893   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
894   MPI_Comm               comm;
895   PetscScalar            *values;
896   DMBoundaryType         bx,by;
897   ISLocalToGlobalMapping ltog;
898   DMDAStencilType        st;
899 
900   PetscFunctionBegin;
901   /*
902          nc - number of components per grid point
903          col - number of colors needed in one direction for single component problem
904 
905   */
906   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
907   col  = 2*s + 1;
908   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
909   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
910   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
911 
912   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
913   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
914 
915   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
916   /* determine the matrix preallocation information */
917   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
918   for (i=xs; i<xs+nx; i++) {
919 
920     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
921     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
922 
923     for (j=ys; j<ys+ny; j++) {
924       slot = i - gxs + gnx*(j - gys);
925 
926       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
927       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
928 
929       cnt = 0;
930       for (k=0; k<nc; k++) {
931         for (l=lstart; l<lend+1; l++) {
932           for (p=pstart; p<pend+1; p++) {
933             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
934               cols[cnt++] = k + nc*(slot + gnx*l + p);
935             }
936           }
937         }
938         rows[k] = k + nc*(slot);
939       }
940       ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
941     }
942   }
943   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
944   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
945   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
946   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
947 
948   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
949 
950   /*
951     For each node in the grid: we get the neighbors in the local (on processor ordering
952     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
953     PETSc ordering.
954   */
955   if (!da->prealloc_only) {
956     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
957     for (i=xs; i<xs+nx; i++) {
958 
959       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
960       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
961 
962       for (j=ys; j<ys+ny; j++) {
963         slot = i - gxs + gnx*(j - gys);
964 
965         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
966         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
967 
968         cnt = 0;
969         for (k=0; k<nc; k++) {
970           for (l=lstart; l<lend+1; l++) {
971             for (p=pstart; p<pend+1; p++) {
972               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
973                 cols[cnt++] = k + nc*(slot + gnx*l + p);
974               }
975             }
976           }
977           rows[k] = k + nc*(slot);
978         }
979         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
980       }
981     }
982     ierr = PetscFree(values);CHKERRQ(ierr);
983     /* do not copy values to GPU since they are all zero and not yet needed there */
984     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
985     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
986     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
988     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
989   }
990   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
995 {
996   PetscErrorCode         ierr;
997   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
998   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
999   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1000   MPI_Comm               comm;
1001   PetscScalar            *values;
1002   DMBoundaryType         bx,by,bz;
1003   ISLocalToGlobalMapping ltog;
1004   DMDAStencilType        st;
1005 
1006   PetscFunctionBegin;
1007   /*
1008          nc - number of components per grid point
1009          col - number of colors needed in one direction for single component problem
1010 
1011   */
1012   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1013   col  = 2*s + 1;
1014   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1015   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1016   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1017 
1018   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1019   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1020 
1021   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1022   /* determine the matrix preallocation information */
1023   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1024   for (i=xs; i<xs+nx; i++) {
1025     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1026     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1027     for (j=ys; j<ys+ny; j++) {
1028       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1029       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1030       for (k=zs; k<zs+nz; k++) {
1031         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1032         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1033 
1034         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1035 
1036         cnt = 0;
1037         for (l=0; l<nc; l++) {
1038           for (ii=istart; ii<iend+1; ii++) {
1039             for (jj=jstart; jj<jend+1; jj++) {
1040               for (kk=kstart; kk<kend+1; kk++) {
1041                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1042                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1043                 }
1044               }
1045             }
1046           }
1047           rows[l] = l + nc*(slot);
1048         }
1049         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1050       }
1051     }
1052   }
1053   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1054   ierr = MatSeqSELLSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1055   ierr = MatMPISELLSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1056   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1057   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1058 
1059   /*
1060     For each node in the grid: we get the neighbors in the local (on processor ordering
1061     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1062     PETSc ordering.
1063   */
1064   if (!da->prealloc_only) {
1065     ierr = PetscCalloc1(col*col*col*nc*nc*nc,&values);CHKERRQ(ierr);
1066     for (i=xs; i<xs+nx; i++) {
1067       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1068       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1069       for (j=ys; j<ys+ny; j++) {
1070         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1071         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1072         for (k=zs; k<zs+nz; k++) {
1073           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1074           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1075 
1076           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1077 
1078           cnt = 0;
1079           for (l=0; l<nc; l++) {
1080             for (ii=istart; ii<iend+1; ii++) {
1081               for (jj=jstart; jj<jend+1; jj++) {
1082                 for (kk=kstart; kk<kend+1; kk++) {
1083                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1084                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1085                   }
1086                 }
1087               }
1088             }
1089             rows[l] = l + nc*(slot);
1090           }
1091           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1092         }
1093       }
1094     }
1095     ierr = PetscFree(values);CHKERRQ(ierr);
1096     /* do not copy values to GPU since they are all zero and not yet needed there */
1097     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1098     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1099     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1100     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1101     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1102   }
1103   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)
1108 {
1109   PetscErrorCode         ierr;
1110   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1111   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1112   MPI_Comm               comm;
1113   DMBoundaryType         bx,by;
1114   ISLocalToGlobalMapping ltog,mltog;
1115   DMDAStencilType        st;
1116   PetscBool              removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE;
1117 
1118   PetscFunctionBegin;
1119   /*
1120          nc - number of components per grid point
1121          col - number of colors needed in one direction for single component problem
1122 
1123   */
1124   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1125   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1126     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1127   }
1128   col  = 2*s + 1;
1129   /*
1130        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1131        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1132   */
1133   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1134   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1135   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1136   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1138 
1139   ierr = PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);CHKERRQ(ierr);
1140   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1141 
1142   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1143   /* determine the matrix preallocation information */
1144   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1145   for (i=xs; i<xs+nx; i++) {
1146     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1147     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1148 
1149     for (j=ys; j<ys+ny; j++) {
1150       slot = i - gxs + gnx*(j - gys);
1151 
1152       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1153       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1154 
1155       cnt = 0;
1156       for (k=0; k<nc; k++) {
1157         for (l=lstart; l<lend+1; l++) {
1158           for (p=pstart; p<pend+1; p++) {
1159             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1160               cols[cnt++] = k + nc*(slot + gnx*l + p);
1161             }
1162           }
1163         }
1164         rows[k] = k + nc*(slot);
1165       }
1166       if (removedups) {
1167         ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1168       } else {
1169         ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1170       }
1171     }
1172   }
1173   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1174   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1175   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1176   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1177   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1178   if (!mltog) {
1179     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1180   }
1181 
1182   /*
1183     For each node in the grid: we get the neighbors in the local (on processor ordering
1184     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1185     PETSc ordering.
1186   */
1187   if (!da->prealloc_only) {
1188     for (i=xs; i<xs+nx; i++) {
1189 
1190       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1191       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1192 
1193       for (j=ys; j<ys+ny; j++) {
1194         slot = i - gxs + gnx*(j - gys);
1195 
1196         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1197         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1198 
1199         cnt = 0;
1200         for (l=lstart; l<lend+1; l++) {
1201           for (p=pstart; p<pend+1; p++) {
1202             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1203               cols[cnt++] = nc*(slot + gnx*l + p);
1204               for (k=1; k<nc; k++) {
1205                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1206               }
1207             }
1208           }
1209         }
1210         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1211         ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1212       }
1213     }
1214     /* do not copy values to GPU since they are all zero and not yet needed there */
1215     ierr = MatBoundToCPU(J,&alreadyboundtocpu);CHKERRQ(ierr);
1216     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1217     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1218     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1219     if (!alreadyboundtocpu) {ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);}
1220     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1221     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1222       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1223     }
1224   }
1225   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1230 {
1231   PetscErrorCode         ierr;
1232   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1233   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1234   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1235   DM_DA                  *dd = (DM_DA*)da->data;
1236   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1237   MPI_Comm               comm;
1238   DMBoundaryType         bx,by;
1239   ISLocalToGlobalMapping ltog;
1240   DMDAStencilType        st;
1241   PetscBool              removedups = PETSC_FALSE;
1242 
1243   PetscFunctionBegin;
1244   /*
1245          nc - number of components per grid point
1246          col - number of colors needed in one direction for single component problem
1247 
1248   */
1249   ierr = DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1250   col  = 2*s + 1;
1251   /*
1252        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1253        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1254   */
1255   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1256   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1257   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1258   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1259   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1260 
1261   ierr = PetscMalloc1(col*col*nc,&cols);CHKERRQ(ierr);
1262   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1263 
1264   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1265   /* determine the matrix preallocation information */
1266   ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);CHKERRQ(ierr);
1267   for (i=xs; i<xs+nx; i++) {
1268 
1269     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1270     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1271 
1272     for (j=ys; j<ys+ny; j++) {
1273       slot = i - gxs + gnx*(j - gys);
1274 
1275       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1276       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1277 
1278       for (k=0; k<nc; k++) {
1279         cnt = 0;
1280         for (l=lstart; l<lend+1; l++) {
1281           for (p=pstart; p<pend+1; p++) {
1282             if (l || p) {
1283               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1284                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1285               }
1286             } else {
1287               if (dfill) {
1288                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1289               } else {
1290                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1291               }
1292             }
1293           }
1294         }
1295         row    = k + nc*(slot);
1296         maxcnt = PetscMax(maxcnt,cnt);
1297         if (removedups) {
1298           ierr   = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1299         } else {
1300           ierr   = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1301         }
1302       }
1303     }
1304   }
1305   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1306   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1307   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1308   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1309 
1310   /*
1311     For each node in the grid: we get the neighbors in the local (on processor ordering
1312     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1313     PETSc ordering.
1314   */
1315   if (!da->prealloc_only) {
1316     for (i=xs; i<xs+nx; i++) {
1317 
1318       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1319       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1320 
1321       for (j=ys; j<ys+ny; j++) {
1322         slot = i - gxs + gnx*(j - gys);
1323 
1324         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1325         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1326 
1327         for (k=0; k<nc; k++) {
1328           cnt = 0;
1329           for (l=lstart; l<lend+1; l++) {
1330             for (p=pstart; p<pend+1; p++) {
1331               if (l || p) {
1332                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1333                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1334                 }
1335               } else {
1336                 if (dfill) {
1337                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1338                 } else {
1339                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1340                 }
1341               }
1342             }
1343           }
1344           row  = k + nc*(slot);
1345           ierr = MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1346         }
1347       }
1348     }
1349     /* do not copy values to GPU since they are all zero and not yet needed there */
1350     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1351     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1352     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1353     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1354     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1355   }
1356   ierr = PetscFree(cols);CHKERRQ(ierr);
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 /* ---------------------------------------------------------------------------------*/
1361 
1362 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)
1363 {
1364   PetscErrorCode         ierr;
1365   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1366   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1367   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1368   MPI_Comm               comm;
1369   DMBoundaryType         bx,by,bz;
1370   ISLocalToGlobalMapping ltog,mltog;
1371   DMDAStencilType        st;
1372   PetscBool              removedups = PETSC_FALSE;
1373 
1374   PetscFunctionBegin;
1375   /*
1376          nc - number of components per grid point
1377          col - number of colors needed in one direction for single component problem
1378 
1379   */
1380   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1381   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1382     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1383   }
1384   col  = 2*s + 1;
1385 
1386   /*
1387        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1388        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1389   */
1390   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1391   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1392   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
1393 
1394   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1395   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1396   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1397 
1398   ierr = PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);CHKERRQ(ierr);
1399   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1400 
1401   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1402   /* determine the matrix preallocation information */
1403   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1404   for (i=xs; i<xs+nx; i++) {
1405     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1406     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1407     for (j=ys; j<ys+ny; j++) {
1408       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1409       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1410       for (k=zs; k<zs+nz; k++) {
1411         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1412         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1413 
1414         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1415 
1416         cnt = 0;
1417         for (l=0; l<nc; l++) {
1418           for (ii=istart; ii<iend+1; ii++) {
1419             for (jj=jstart; jj<jend+1; jj++) {
1420               for (kk=kstart; kk<kend+1; kk++) {
1421                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1422                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1423                 }
1424               }
1425             }
1426           }
1427           rows[l] = l + nc*(slot);
1428         }
1429         if (removedups) {
1430           ierr = MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1431         } else {
1432           ierr = MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1433         }
1434       }
1435     }
1436   }
1437   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1438   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1439   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1440   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1441   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1442   if (!mltog) {
1443     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1444   }
1445 
1446   /*
1447     For each node in the grid: we get the neighbors in the local (on processor ordering
1448     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1449     PETSc ordering.
1450   */
1451   if (!da->prealloc_only) {
1452     for (i=xs; i<xs+nx; i++) {
1453       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1454       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1455       for (j=ys; j<ys+ny; j++) {
1456         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1457         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1458         for (k=zs; k<zs+nz; k++) {
1459           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1460           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1461 
1462           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1463 
1464           cnt = 0;
1465           for (kk=kstart; kk<kend+1; kk++) {
1466             for (jj=jstart; jj<jend+1; jj++) {
1467               for (ii=istart; ii<iend+1; ii++) {
1468                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1469                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1470                     for (l=1; l<nc; l++) {
1471                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1472                   }
1473                 }
1474               }
1475             }
1476           }
1477           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1478           ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1479         }
1480       }
1481     }
1482     /* do not copy values to GPU since they are all zero and not yet needed there */
1483     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1484     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1485     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1486     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1487       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1488     }
1489     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1490     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1491   }
1492   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 /* ---------------------------------------------------------------------------------*/
1497 
1498 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1499 {
1500   PetscErrorCode         ierr;
1501   DM_DA                  *dd = (DM_DA*)da->data;
1502   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1503   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1504   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1505   DMBoundaryType         bx;
1506   ISLocalToGlobalMapping ltog;
1507   PetscMPIInt            rank,size;
1508 
1509   PetscFunctionBegin;
1510   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRMPI(ierr);
1511   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRMPI(ierr);
1512 
1513   /*
1514          nc - number of components per grid point
1515 
1516   */
1517   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1518   PetscCheckFalse(s > 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1519   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1520   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1521 
1522   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1523   ierr = PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);CHKERRQ(ierr);
1524 
1525   /*
1526         note should be smaller for first and last process with no periodic
1527         does not handle dfill
1528   */
1529   cnt = 0;
1530   /* coupling with process to the left */
1531   for (i=0; i<s; i++) {
1532     for (j=0; j<nc; j++) {
1533       ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1534       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1535       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1536         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1537         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1538       }
1539       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1540       cnt++;
1541     }
1542   }
1543   for (i=s; i<nx-s; i++) {
1544     for (j=0; j<nc; j++) {
1545       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1546       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1547       cnt++;
1548     }
1549   }
1550   /* coupling with process to the right */
1551   for (i=nx-s; i<nx; i++) {
1552     for (j=0; j<nc; j++) {
1553       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1554       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1555       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1556         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1557         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1558       }
1559       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1560       cnt++;
1561     }
1562   }
1563 
1564   ierr = MatSeqAIJSetPreallocation(J,0,cols);CHKERRQ(ierr);
1565   ierr = MatMPIAIJSetPreallocation(J,0,cols,0,ocols);CHKERRQ(ierr);
1566   ierr = PetscFree2(cols,ocols);CHKERRQ(ierr);
1567 
1568   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1569   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1570 
1571   /*
1572     For each node in the grid: we get the neighbors in the local (on processor ordering
1573     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1574     PETSc ordering.
1575   */
1576   if (!da->prealloc_only) {
1577     ierr = PetscMalloc1(maxcnt,&cols);CHKERRQ(ierr);
1578     row = xs*nc;
1579     /* coupling with process to the left */
1580     for (i=xs; i<xs+s; i++) {
1581       for (j=0; j<nc; j++) {
1582         cnt = 0;
1583         if (rank) {
1584           for (l=0; l<s; l++) {
1585             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1586           }
1587         }
1588         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1589           for (l=0; l<s; l++) {
1590             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1591           }
1592         }
1593         if (dfill) {
1594           for (k=dfill[j]; k<dfill[j+1]; k++) {
1595             cols[cnt++] = i*nc + dfill[k];
1596           }
1597         } else {
1598           for (k=0; k<nc; k++) {
1599             cols[cnt++] = i*nc + k;
1600           }
1601         }
1602         for (l=0; l<s; l++) {
1603           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1604         }
1605         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1606         row++;
1607       }
1608     }
1609     for (i=xs+s; i<xs+nx-s; i++) {
1610       for (j=0; j<nc; j++) {
1611         cnt = 0;
1612         for (l=0; l<s; l++) {
1613           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1614         }
1615         if (dfill) {
1616           for (k=dfill[j]; k<dfill[j+1]; k++) {
1617             cols[cnt++] = i*nc + dfill[k];
1618           }
1619         } else {
1620           for (k=0; k<nc; k++) {
1621             cols[cnt++] = i*nc + k;
1622           }
1623         }
1624         for (l=0; l<s; l++) {
1625           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1626         }
1627         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1628         row++;
1629       }
1630     }
1631     /* coupling with process to the right */
1632     for (i=xs+nx-s; i<xs+nx; i++) {
1633       for (j=0; j<nc; j++) {
1634         cnt = 0;
1635         for (l=0; l<s; l++) {
1636           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1637         }
1638         if (dfill) {
1639           for (k=dfill[j]; k<dfill[j+1]; k++) {
1640             cols[cnt++] = i*nc + dfill[k];
1641           }
1642         } else {
1643           for (k=0; k<nc; k++) {
1644             cols[cnt++] = i*nc + k;
1645           }
1646         }
1647         if (rank < size-1) {
1648           for (l=0; l<s; l++) {
1649             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1650           }
1651         }
1652         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1653           for (l=0; l<s; l++) {
1654             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1655           }
1656         }
1657         ierr = MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1658         row++;
1659       }
1660     }
1661     ierr = PetscFree(cols);CHKERRQ(ierr);
1662     /* do not copy values to GPU since they are all zero and not yet needed there */
1663     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1664     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1667     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1668   }
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 /* ---------------------------------------------------------------------------------*/
1673 
1674 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)
1675 {
1676   PetscErrorCode         ierr;
1677   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1678   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1679   PetscInt               istart,iend;
1680   DMBoundaryType         bx;
1681   ISLocalToGlobalMapping ltog,mltog;
1682 
1683   PetscFunctionBegin;
1684   /*
1685          nc - number of components per grid point
1686          col - number of colors needed in one direction for single component problem
1687 
1688   */
1689   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1690   if (bx == DM_BOUNDARY_NONE) {
1691     ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);CHKERRQ(ierr);
1692   }
1693   col  = 2*s + 1;
1694 
1695   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1696   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1697 
1698   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1699   ierr = MatSeqAIJSetPreallocation(J,col*nc,NULL);CHKERRQ(ierr);
1700   ierr = MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);CHKERRQ(ierr);
1701 
1702   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1703   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1704   if (!mltog) {
1705     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1706   }
1707 
1708   /*
1709     For each node in the grid: we get the neighbors in the local (on processor ordering
1710     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1711     PETSc ordering.
1712   */
1713   if (!da->prealloc_only) {
1714     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1715     for (i=xs; i<xs+nx; i++) {
1716       istart = PetscMax(-s,gxs - i);
1717       iend   = PetscMin(s,gxs + gnx - i - 1);
1718       slot   = i - gxs;
1719 
1720       cnt = 0;
1721       for (i1=istart; i1<iend+1; i1++) {
1722         cols[cnt++] = nc*(slot + i1);
1723         for (l=1; l<nc; l++) {
1724           cols[cnt] = 1 + cols[cnt-1];cnt++;
1725         }
1726       }
1727       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1728       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1729     }
1730     /* do not copy values to GPU since they are all zero and not yet needed there */
1731     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1732     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1733     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1734     if (bx == DM_BOUNDARY_NONE) {
1735       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1736     }
1737     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1738     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1739     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1740   }
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 /* ---------------------------------------------------------------------------------*/
1745 
1746 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J)
1747 {
1748   PetscErrorCode         ierr;
1749   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1750   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1751   PetscInt               istart,iend;
1752   DMBoundaryType         bx;
1753   ISLocalToGlobalMapping ltog,mltog;
1754 
1755   PetscFunctionBegin;
1756   /*
1757          nc - number of components per grid point
1758          col - number of colors needed in one direction for single component problem
1759   */
1760   ierr = DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);CHKERRQ(ierr);
1761   col  = 2*s + 1;
1762 
1763   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);CHKERRQ(ierr);
1764   ierr = DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);CHKERRQ(ierr);
1765 
1766   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
1767   ierr = MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);CHKERRQ(ierr);
1768 
1769   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1770   ierr = MatGetLocalToGlobalMapping(J,&mltog,NULL);CHKERRQ(ierr);
1771   if (!mltog) {
1772     ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1773   }
1774 
1775   /*
1776     For each node in the grid: we get the neighbors in the local (on processor ordering
1777     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1778     PETSc ordering.
1779   */
1780   if (!da->prealloc_only) {
1781     ierr = PetscMalloc2(nc,&rows,col*nc*nc,&cols);CHKERRQ(ierr);
1782     for (i=xs; i<xs+nx; i++) {
1783       istart = PetscMax(-s,gxs - i);
1784       iend   = PetscMin(s,gxs + gnx - i - 1);
1785       slot   = i - gxs;
1786 
1787       cnt = 0;
1788       for (i1=istart; i1<iend+1; i1++) {
1789         cols[cnt++] = nc*(slot + i1);
1790         for (l=1; l<nc; l++) {
1791           cols[cnt] = 1 + cols[cnt-1];cnt++;
1792         }
1793       }
1794       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1795       ierr = MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);CHKERRQ(ierr);
1796     }
1797     /* do not copy values to GPU since they are all zero and not yet needed there */
1798     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1799     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1800     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1801     if (bx == DM_BOUNDARY_NONE) {
1802       ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1803     }
1804     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1805     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1806     ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1807   }
1808   ierr = MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1813 {
1814   PetscErrorCode         ierr;
1815   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1816   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1817   PetscInt               istart,iend,jstart,jend,ii,jj;
1818   MPI_Comm               comm;
1819   PetscScalar            *values;
1820   DMBoundaryType         bx,by;
1821   DMDAStencilType        st;
1822   ISLocalToGlobalMapping ltog;
1823 
1824   PetscFunctionBegin;
1825   /*
1826      nc - number of components per grid point
1827      col - number of colors needed in one direction for single component problem
1828   */
1829   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
1830   col  = 2*s + 1;
1831 
1832   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
1833   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
1834   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1835 
1836   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
1837 
1838   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1839 
1840   /* determine the matrix preallocation information */
1841   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
1842   for (i=xs; i<xs+nx; i++) {
1843     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1844     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1845     for (j=ys; j<ys+ny; j++) {
1846       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1847       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1848       slot   = i - gxs + gnx*(j - gys);
1849 
1850       /* Find block columns in block row */
1851       cnt = 0;
1852       for (ii=istart; ii<iend+1; ii++) {
1853         for (jj=jstart; jj<jend+1; jj++) {
1854           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1855             cols[cnt++] = slot + ii + gnx*jj;
1856           }
1857         }
1858       }
1859       ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1860     }
1861   }
1862   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1863   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1864   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1865 
1866   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1867 
1868   /*
1869     For each node in the grid: we get the neighbors in the local (on processor ordering
1870     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1871     PETSc ordering.
1872   */
1873   if (!da->prealloc_only) {
1874     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
1875     for (i=xs; i<xs+nx; i++) {
1876       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1877       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1878       for (j=ys; j<ys+ny; j++) {
1879         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1880         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1881         slot = i - gxs + gnx*(j - gys);
1882         cnt  = 0;
1883         for (ii=istart; ii<iend+1; ii++) {
1884           for (jj=jstart; jj<jend+1; jj++) {
1885             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1886               cols[cnt++] = slot + ii + gnx*jj;
1887             }
1888           }
1889         }
1890         ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1891       }
1892     }
1893     ierr = PetscFree(values);CHKERRQ(ierr);
1894     /* do not copy values to GPU since they are all zero and not yet needed there */
1895     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
1896     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1897     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1898     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
1899     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1900   }
1901   ierr = PetscFree(cols);CHKERRQ(ierr);
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1906 {
1907   PetscErrorCode         ierr;
1908   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1909   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1910   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1911   MPI_Comm               comm;
1912   PetscScalar            *values;
1913   DMBoundaryType         bx,by,bz;
1914   DMDAStencilType        st;
1915   ISLocalToGlobalMapping ltog;
1916 
1917   PetscFunctionBegin;
1918   /*
1919          nc - number of components per grid point
1920          col - number of colors needed in one direction for single component problem
1921 
1922   */
1923   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
1924   col  = 2*s + 1;
1925 
1926   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
1927   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
1928   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
1929 
1930   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
1931 
1932   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1933 
1934   /* determine the matrix preallocation information */
1935   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
1936   for (i=xs; i<xs+nx; i++) {
1937     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1938     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1939     for (j=ys; j<ys+ny; j++) {
1940       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1941       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1942       for (k=zs; k<zs+nz; k++) {
1943         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1944         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1945 
1946         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1947 
1948         /* Find block columns in block row */
1949         cnt = 0;
1950         for (ii=istart; ii<iend+1; ii++) {
1951           for (jj=jstart; jj<jend+1; jj++) {
1952             for (kk=kstart; kk<kend+1; kk++) {
1953               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1954                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1955               }
1956             }
1957           }
1958         }
1959         ierr = MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
1960       }
1961     }
1962   }
1963   ierr = MatSeqBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
1964   ierr = MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
1965   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1966 
1967   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
1968 
1969   /*
1970     For each node in the grid: we get the neighbors in the local (on processor ordering
1971     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1972     PETSc ordering.
1973   */
1974   if (!da->prealloc_only) {
1975     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
1976     for (i=xs; i<xs+nx; i++) {
1977       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1978       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1979       for (j=ys; j<ys+ny; j++) {
1980         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1981         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1982         for (k=zs; k<zs+nz; k++) {
1983           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1984           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
1985 
1986           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
1987 
1988           cnt = 0;
1989           for (ii=istart; ii<iend+1; ii++) {
1990             for (jj=jstart; jj<jend+1; jj++) {
1991               for (kk=kstart; kk<kend+1; kk++) {
1992                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1993                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1994                 }
1995               }
1996             }
1997           }
1998           ierr = MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
1999         }
2000       }
2001     }
2002     ierr = PetscFree(values);CHKERRQ(ierr);
2003     /* do not copy values to GPU since they are all zero and not yet needed there */
2004     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2005     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2006     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2007     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2008     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2009   }
2010   ierr = PetscFree(cols);CHKERRQ(ierr);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 /*
2015   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2016   identify in the local ordering with periodic domain.
2017 */
2018 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2019 {
2020   PetscErrorCode ierr;
2021   PetscInt       i,n;
2022 
2023   PetscFunctionBegin;
2024   ierr = ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);CHKERRQ(ierr);
2025   ierr = ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);CHKERRQ(ierr);
2026   for (i=0,n=0; i<*cnt; i++) {
2027     if (col[i] >= *row) col[n++] = col[i];
2028   }
2029   *cnt = n;
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2034 {
2035   PetscErrorCode         ierr;
2036   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2037   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2038   PetscInt               istart,iend,jstart,jend,ii,jj;
2039   MPI_Comm               comm;
2040   PetscScalar            *values;
2041   DMBoundaryType         bx,by;
2042   DMDAStencilType        st;
2043   ISLocalToGlobalMapping ltog;
2044 
2045   PetscFunctionBegin;
2046   /*
2047      nc - number of components per grid point
2048      col - number of colors needed in one direction for single component problem
2049   */
2050   ierr = DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);CHKERRQ(ierr);
2051   col  = 2*s + 1;
2052 
2053   ierr = DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);CHKERRQ(ierr);
2054   ierr = DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);CHKERRQ(ierr);
2055   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2056 
2057   ierr = PetscMalloc1(col*col*nc*nc,&cols);CHKERRQ(ierr);
2058 
2059   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2060 
2061   /* determine the matrix preallocation information */
2062   ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);CHKERRQ(ierr);
2063   for (i=xs; i<xs+nx; i++) {
2064     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2065     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2066     for (j=ys; j<ys+ny; j++) {
2067       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2068       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2069       slot   = i - gxs + gnx*(j - gys);
2070 
2071       /* Find block columns in block row */
2072       cnt = 0;
2073       for (ii=istart; ii<iend+1; ii++) {
2074         for (jj=jstart; jj<jend+1; jj++) {
2075           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2076             cols[cnt++] = slot + ii + gnx*jj;
2077           }
2078         }
2079       }
2080       ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2081       ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2082     }
2083   }
2084   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2085   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2086   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2087 
2088   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2089 
2090   /*
2091     For each node in the grid: we get the neighbors in the local (on processor ordering
2092     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2093     PETSc ordering.
2094   */
2095   if (!da->prealloc_only) {
2096     ierr = PetscCalloc1(col*col*nc*nc,&values);CHKERRQ(ierr);
2097     for (i=xs; i<xs+nx; i++) {
2098       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2099       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2100       for (j=ys; j<ys+ny; j++) {
2101         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2102         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2103         slot   = i - gxs + gnx*(j - gys);
2104 
2105         /* Find block columns in block row */
2106         cnt = 0;
2107         for (ii=istart; ii<iend+1; ii++) {
2108           for (jj=jstart; jj<jend+1; jj++) {
2109             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2110               cols[cnt++] = slot + ii + gnx*jj;
2111             }
2112           }
2113         }
2114         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2115         ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2116       }
2117     }
2118     ierr = PetscFree(values);CHKERRQ(ierr);
2119     /* do not copy values to GPU since they are all zero and not yet needed there */
2120     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2121     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2122     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2123     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2124     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2125   }
2126   ierr = PetscFree(cols);CHKERRQ(ierr);
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2131 {
2132   PetscErrorCode         ierr;
2133   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2134   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2135   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2136   MPI_Comm               comm;
2137   PetscScalar            *values;
2138   DMBoundaryType         bx,by,bz;
2139   DMDAStencilType        st;
2140   ISLocalToGlobalMapping ltog;
2141 
2142   PetscFunctionBegin;
2143   /*
2144      nc - number of components per grid point
2145      col - number of colors needed in one direction for single component problem
2146   */
2147   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2148   col  = 2*s + 1;
2149 
2150   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2151   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2152   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2153 
2154   /* create the matrix */
2155   ierr = PetscMalloc1(col*col*col,&cols);CHKERRQ(ierr);
2156 
2157   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2158 
2159   /* determine the matrix preallocation information */
2160   ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2161   for (i=xs; i<xs+nx; i++) {
2162     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2163     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2164     for (j=ys; j<ys+ny; j++) {
2165       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2166       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2167       for (k=zs; k<zs+nz; k++) {
2168         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2169         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2170 
2171         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2172 
2173         /* Find block columns in block row */
2174         cnt = 0;
2175         for (ii=istart; ii<iend+1; ii++) {
2176           for (jj=jstart; jj<jend+1; jj++) {
2177             for (kk=kstart; kk<kend+1; kk++) {
2178               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2179                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2180               }
2181             }
2182           }
2183         }
2184         ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2185         ierr = MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);CHKERRQ(ierr);
2186       }
2187     }
2188   }
2189   ierr = MatSeqSBAIJSetPreallocation(J,nc,0,dnz);CHKERRQ(ierr);
2190   ierr = MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);CHKERRQ(ierr);
2191   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2192 
2193   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2194 
2195   /*
2196     For each node in the grid: we get the neighbors in the local (on processor ordering
2197     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2198     PETSc ordering.
2199   */
2200   if (!da->prealloc_only) {
2201     ierr = PetscCalloc1(col*col*col*nc*nc,&values);CHKERRQ(ierr);
2202     for (i=xs; i<xs+nx; i++) {
2203       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2204       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2205       for (j=ys; j<ys+ny; j++) {
2206         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2207         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2208         for (k=zs; k<zs+nz; k++) {
2209           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2210           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2211 
2212           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2213 
2214           cnt = 0;
2215           for (ii=istart; ii<iend+1; ii++) {
2216             for (jj=jstart; jj<jend+1; jj++) {
2217               for (kk=kstart; kk<kend+1; kk++) {
2218                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2219                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2220                 }
2221               }
2222             }
2223           }
2224           ierr = L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);CHKERRQ(ierr);
2225           ierr = MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2226         }
2227       }
2228     }
2229     ierr = PetscFree(values);CHKERRQ(ierr);
2230     /* do not copy values to GPU since they are all zero and not yet needed there */
2231     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2232     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2233     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2234     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2235     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2236   }
2237   ierr = PetscFree(cols);CHKERRQ(ierr);
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 /* ---------------------------------------------------------------------------------*/
2242 
2243 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2244 {
2245   PetscErrorCode         ierr;
2246   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2247   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2248   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2249   DM_DA                  *dd = (DM_DA*)da->data;
2250   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2251   MPI_Comm               comm;
2252   PetscScalar            *values;
2253   DMBoundaryType         bx,by,bz;
2254   ISLocalToGlobalMapping ltog;
2255   DMDAStencilType        st;
2256   PetscBool              removedups = PETSC_FALSE;
2257 
2258   PetscFunctionBegin;
2259   /*
2260          nc - number of components per grid point
2261          col - number of colors needed in one direction for single component problem
2262 
2263   */
2264   ierr = DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);CHKERRQ(ierr);
2265   col  = 2*s + 1;
2266   PetscCheckFalse(bx == DM_BOUNDARY_PERIODIC && (m % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2267                  by 2*stencil_width + 1\n");
2268   PetscCheckFalse(by == DM_BOUNDARY_PERIODIC && (n % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2269                  by 2*stencil_width + 1\n");
2270   PetscCheckFalse(bz == DM_BOUNDARY_PERIODIC && (p % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2271                  by 2*stencil_width + 1\n");
2272 
2273   /*
2274        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2275        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2276   */
2277   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2278   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2279   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;
2280 
2281   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);CHKERRQ(ierr);
2282   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);CHKERRQ(ierr);
2283   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
2284 
2285   ierr = PetscMalloc1(col*col*col*nc,&cols);CHKERRQ(ierr);
2286   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
2287 
2288   /* determine the matrix preallocation information */
2289   ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);CHKERRQ(ierr);
2290 
2291   ierr = MatSetBlockSize(J,nc);CHKERRQ(ierr);
2292   for (i=xs; i<xs+nx; i++) {
2293     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2294     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2295     for (j=ys; j<ys+ny; j++) {
2296       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2297       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2298       for (k=zs; k<zs+nz; k++) {
2299         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2300         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2301 
2302         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2303 
2304         for (l=0; l<nc; l++) {
2305           cnt = 0;
2306           for (ii=istart; ii<iend+1; ii++) {
2307             for (jj=jstart; jj<jend+1; jj++) {
2308               for (kk=kstart; kk<kend+1; kk++) {
2309                 if (ii || jj || kk) {
2310                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2311                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2312                   }
2313                 } else {
2314                   if (dfill) {
2315                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2316                   } else {
2317                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2318                   }
2319                 }
2320               }
2321             }
2322           }
2323           row  = l + nc*(slot);
2324           maxcnt = PetscMax(maxcnt,cnt);
2325           if (removedups) {
2326             ierr = MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2327           } else {
2328             ierr = MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);CHKERRQ(ierr);
2329           }
2330         }
2331       }
2332     }
2333   }
2334   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
2335   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
2336   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2337   ierr = MatSetLocalToGlobalMapping(J,ltog,ltog);CHKERRQ(ierr);
2338 
2339   /*
2340     For each node in the grid: we get the neighbors in the local (on processor ordering
2341     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2342     PETSc ordering.
2343   */
2344   if (!da->prealloc_only) {
2345     ierr = PetscCalloc1(maxcnt,&values);CHKERRQ(ierr);
2346     for (i=xs; i<xs+nx; i++) {
2347       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2348       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2349       for (j=ys; j<ys+ny; j++) {
2350         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2351         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2352         for (k=zs; k<zs+nz; k++) {
2353           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2354           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));
2355 
2356           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
2357 
2358           for (l=0; l<nc; l++) {
2359             cnt = 0;
2360             for (ii=istart; ii<iend+1; ii++) {
2361               for (jj=jstart; jj<jend+1; jj++) {
2362                 for (kk=kstart; kk<kend+1; kk++) {
2363                   if (ii || jj || kk) {
2364                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2365                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2366                     }
2367                   } else {
2368                     if (dfill) {
2369                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2370                     } else {
2371                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2372                     }
2373                   }
2374                 }
2375               }
2376             }
2377             row  = l + nc*(slot);
2378             ierr = MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);CHKERRQ(ierr);
2379           }
2380         }
2381       }
2382     }
2383     ierr = PetscFree(values);CHKERRQ(ierr);
2384     /* do not copy values to GPU since they are all zero and not yet needed there */
2385     ierr = MatBindToCPU(J,PETSC_TRUE);CHKERRQ(ierr);
2386     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388     ierr = MatBindToCPU(J,PETSC_FALSE);CHKERRQ(ierr);
2389     ierr = MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2390   }
2391   ierr = PetscFree(cols);CHKERRQ(ierr);
2392   PetscFunctionReturn(0);
2393 }
2394