xref: /petsc/src/dm/impls/da/da2.c (revision 607d249ec66f5be42ddd7f6f35ea64c82fd126db)
1 
2 #include <petsc/private/dmdaimpl.h>    /*I   "petscdmda.h"   I*/
3 #include <petscdraw.h>
4 
5 static PetscErrorCode DMView_DA_2d(DM da,PetscViewer viewer)
6 {
7   PetscErrorCode ierr;
8   PetscMPIInt    rank;
9   PetscBool      iascii,isdraw,isglvis,isbinary;
10   DM_DA          *dd = (DM_DA*)da->data;
11 #if defined(PETSC_HAVE_MATLAB_ENGINE)
12   PetscBool ismatlab;
13 #endif
14 
15   PetscFunctionBegin;
16   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
17 
18   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
19   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
20   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERGLVIS,&isglvis);CHKERRQ(ierr);
21   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
22 #if defined(PETSC_HAVE_MATLAB_ENGINE)
23   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERMATLAB,&ismatlab);CHKERRQ(ierr);
24 #endif
25   if (iascii) {
26     PetscViewerFormat format;
27 
28     ierr = PetscViewerGetFormat(viewer, &format);CHKERRQ(ierr);
29     if (format == PETSC_VIEWER_LOAD_BALANCE) {
30       PetscInt      i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal;
31       DMDALocalInfo info;
32       PetscMPIInt   size;
33       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
34       ierr = DMDAGetLocalInfo(da,&info);CHKERRQ(ierr);
35       nzlocal = info.xm*info.ym;
36       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
37       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
38       for (i=0; i<(PetscInt)size; i++) {
39         nmax = PetscMax(nmax,nz[i]);
40         nmin = PetscMin(nmin,nz[i]);
41         navg += nz[i];
42       }
43       ierr = PetscFree(nz);CHKERRQ(ierr);
44       navg = navg/size;
45       ierr = PetscViewerASCIIPrintf(viewer,"  Load Balance - Grid Points: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
46       PetscFunctionReturn(0);
47     }
48     if (format != PETSC_VIEWER_ASCII_VTK && format != PETSC_VIEWER_ASCII_VTK_CELL && format != PETSC_VIEWER_ASCII_GLVIS) {
49       DMDALocalInfo info;
50       ierr = DMDAGetLocalInfo(da,&info);CHKERRQ(ierr);
51       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
52       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Processor [%d] M %D N %D m %D n %D w %D s %D\n",rank,dd->M,dd->N,dd->m,dd->n,dd->w,dd->s);CHKERRQ(ierr);
53       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"X range of indices: %D %D, Y range of indices: %D %D\n",info.xs,info.xs+info.xm,info.ys,info.ys+info.ym);CHKERRQ(ierr);
54       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
55       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
56     } else if (format == PETSC_VIEWER_ASCII_GLVIS) {
57       ierr = DMView_DA_GLVis(da,viewer);CHKERRQ(ierr);
58     } else {
59       ierr = DMView_DA_VTK(da,viewer);CHKERRQ(ierr);
60     }
61   } else if (isdraw) {
62     PetscDraw      draw;
63     double         ymin = -1*dd->s-1,ymax = dd->N+dd->s;
64     double         xmin = -1*dd->s-1,xmax = dd->M+dd->s;
65     double         x,y;
66     PetscInt       base;
67     const PetscInt *idx;
68     char           node[10];
69     PetscBool      isnull;
70 
71     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
72     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
73     if (isnull) PetscFunctionReturn(0);
74 
75     ierr = PetscDrawCheckResizedWindow(draw);CHKERRQ(ierr);
76     ierr = PetscDrawClear(draw);CHKERRQ(ierr);
77     ierr = PetscDrawSetCoordinates(draw,xmin,ymin,xmax,ymax);CHKERRQ(ierr);
78 
79     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
80     /* first processor draw all node lines */
81     if (!rank) {
82       ymin = 0.0; ymax = dd->N - 1;
83       for (xmin=0; xmin<dd->M; xmin++) {
84         ierr = PetscDrawLine(draw,xmin,ymin,xmin,ymax,PETSC_DRAW_BLACK);CHKERRQ(ierr);
85       }
86       xmin = 0.0; xmax = dd->M - 1;
87       for (ymin=0; ymin<dd->N; ymin++) {
88         ierr = PetscDrawLine(draw,xmin,ymin,xmax,ymin,PETSC_DRAW_BLACK);CHKERRQ(ierr);
89       }
90     }
91     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
92     ierr = PetscDrawFlush(draw);CHKERRQ(ierr);
93     ierr = PetscDrawPause(draw);CHKERRQ(ierr);
94 
95     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
96     /* draw my box */
97     xmin = dd->xs/dd->w; xmax =(dd->xe-1)/dd->w; ymin = dd->ys; ymax = dd->ye - 1;
98     ierr = PetscDrawLine(draw,xmin,ymin,xmax,ymin,PETSC_DRAW_RED);CHKERRQ(ierr);
99     ierr = PetscDrawLine(draw,xmin,ymin,xmin,ymax,PETSC_DRAW_RED);CHKERRQ(ierr);
100     ierr = PetscDrawLine(draw,xmin,ymax,xmax,ymax,PETSC_DRAW_RED);CHKERRQ(ierr);
101     ierr = PetscDrawLine(draw,xmax,ymin,xmax,ymax,PETSC_DRAW_RED);CHKERRQ(ierr);
102     /* put in numbers */
103     base = (dd->base)/dd->w;
104     for (y=ymin; y<=ymax; y++) {
105       for (x=xmin; x<=xmax; x++) {
106         ierr = PetscSNPrintf(node,sizeof(node),"%d",(int)base++);CHKERRQ(ierr);
107         ierr = PetscDrawString(draw,x,y,PETSC_DRAW_BLACK,node);CHKERRQ(ierr);
108       }
109     }
110     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
111     ierr = PetscDrawFlush(draw);CHKERRQ(ierr);
112     ierr = PetscDrawPause(draw);CHKERRQ(ierr);
113 
114     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
115     /* overlay ghost numbers, useful for error checking */
116     ierr = ISLocalToGlobalMappingGetBlockIndices(da->ltogmap,&idx);CHKERRQ(ierr);
117     base = 0; xmin = dd->Xs; xmax = dd->Xe; ymin = dd->Ys; ymax = dd->Ye;
118     for (y=ymin; y<ymax; y++) {
119       for (x=xmin; x<xmax; x++) {
120         if ((base % dd->w) == 0) {
121           ierr = PetscSNPrintf(node,sizeof(node),"%d",(int)(idx[base/dd->w]));CHKERRQ(ierr);
122           ierr = PetscDrawString(draw,x/dd->w,y,PETSC_DRAW_BLUE,node);CHKERRQ(ierr);
123         }
124         base++;
125       }
126     }
127     ierr = ISLocalToGlobalMappingRestoreBlockIndices(da->ltogmap,&idx);CHKERRQ(ierr);
128     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
129     ierr = PetscDrawFlush(draw);CHKERRQ(ierr);
130     ierr = PetscDrawPause(draw);CHKERRQ(ierr);
131     ierr = PetscDrawSave(draw);CHKERRQ(ierr);
132   } else if (isglvis) {
133     ierr = DMView_DA_GLVis(da,viewer);CHKERRQ(ierr);
134   } else if (isbinary) {
135     ierr = DMView_DA_Binary(da,viewer);CHKERRQ(ierr);
136 #if defined(PETSC_HAVE_MATLAB_ENGINE)
137   } else if (ismatlab) {
138     ierr = DMView_DA_Matlab(da,viewer);CHKERRQ(ierr);
139 #endif
140   }
141   PetscFunctionReturn(0);
142 }
143 
144 /*
145       M is number of grid points
146       m is number of processors
147 
148 */
149 PetscErrorCode  DMDASplitComm2d(MPI_Comm comm,PetscInt M,PetscInt N,PetscInt sw,MPI_Comm *outcomm)
150 {
151   PetscErrorCode ierr;
152   PetscInt       m,n = 0,x = 0,y = 0;
153   PetscMPIInt    size,csize,rank;
154 
155   PetscFunctionBegin;
156   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
157   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
158 
159   csize = 4*size;
160   do {
161     if (csize % 4) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Cannot split communicator of size %d tried %d %D %D",size,csize,x,y);
162     csize = csize/4;
163 
164     m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)csize)/((PetscReal)N)));
165     if (!m) m = 1;
166     while (m > 0) {
167       n = csize/m;
168       if (m*n == csize) break;
169       m--;
170     }
171     if (M > N && m < n) {PetscInt _m = m; m = n; n = _m;}
172 
173     x = M/m + ((M % m) > ((csize-1) % m));
174     y = (N + (csize-1)/m)/n;
175   } while ((x < 4 || y < 4) && csize > 1);
176   if (size != csize) {
177     MPI_Group   entire_group,sub_group;
178     PetscMPIInt i,*groupies;
179 
180     ierr = MPI_Comm_group(comm,&entire_group);CHKERRQ(ierr);
181     ierr = PetscMalloc1(csize,&groupies);CHKERRQ(ierr);
182     for (i=0; i<csize; i++) {
183       groupies[i] = (rank/csize)*csize + i;
184     }
185     ierr = MPI_Group_incl(entire_group,csize,groupies,&sub_group);CHKERRQ(ierr);
186     ierr = PetscFree(groupies);CHKERRQ(ierr);
187     ierr = MPI_Comm_create(comm,sub_group,outcomm);CHKERRQ(ierr);
188     ierr = MPI_Group_free(&entire_group);CHKERRQ(ierr);
189     ierr = MPI_Group_free(&sub_group);CHKERRQ(ierr);
190     ierr = PetscInfo1(0,"DMDASplitComm2d:Creating redundant coarse problems of size %d\n",csize);CHKERRQ(ierr);
191   } else {
192     *outcomm = comm;
193   }
194   PetscFunctionReturn(0);
195 }
196 
197 #if defined(new)
198 /*
199   DMDAGetDiagonal_MFFD - Gets the diagonal for a matrix free matrix where local
200     function lives on a DMDA
201 
202         y ~= (F(u + ha) - F(u))/h,
203   where F = nonlinear function, as set by SNESSetFunction()
204         u = current iterate
205         h = difference interval
206 */
207 PetscErrorCode DMDAGetDiagonal_MFFD(DM da,Vec U,Vec a)
208 {
209   PetscScalar    h,*aa,*ww,v;
210   PetscReal      epsilon = PETSC_SQRT_MACHINE_EPSILON,umin = 100.0*PETSC_SQRT_MACHINE_EPSILON;
211   PetscErrorCode ierr;
212   PetscInt       gI,nI;
213   MatStencil     stencil;
214   DMDALocalInfo  info;
215 
216   PetscFunctionBegin;
217   ierr = (*ctx->func)(0,U,a,ctx->funcctx);CHKERRQ(ierr);
218   ierr = (*ctx->funcisetbase)(U,ctx->funcctx);CHKERRQ(ierr);
219 
220   ierr = VecGetArray(U,&ww);CHKERRQ(ierr);
221   ierr = VecGetArray(a,&aa);CHKERRQ(ierr);
222 
223   nI = 0;
224   h  = ww[gI];
225   if (h == 0.0) h = 1.0;
226   if (PetscAbsScalar(h) < umin && PetscRealPart(h) >= 0.0) h = umin;
227   else if (PetscRealPart(h) < 0.0 && PetscAbsScalar(h) < umin) h = -umin;
228   h *= epsilon;
229 
230   ww[gI] += h;
231   ierr    = (*ctx->funci)(i,w,&v,ctx->funcctx);CHKERRQ(ierr);
232   aa[nI]  = (v - aa[nI])/h;
233   ww[gI] -= h;
234   nI++;
235 
236   ierr = VecRestoreArray(U,&ww);CHKERRQ(ierr);
237   ierr = VecRestoreArray(a,&aa);CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240 #endif
241 
242 PetscErrorCode  DMSetUp_DA_2D(DM da)
243 {
244   DM_DA            *dd = (DM_DA*)da->data;
245   const PetscInt   M            = dd->M;
246   const PetscInt   N            = dd->N;
247   PetscInt         m            = dd->m;
248   PetscInt         n            = dd->n;
249   const PetscInt   dof          = dd->w;
250   const PetscInt   s            = dd->s;
251   DMBoundaryType   bx           = dd->bx;
252   DMBoundaryType   by           = dd->by;
253   DMDAStencilType  stencil_type = dd->stencil_type;
254   PetscInt         *lx          = dd->lx;
255   PetscInt         *ly          = dd->ly;
256   MPI_Comm         comm;
257   PetscMPIInt      rank,size;
258   PetscInt         xs,xe,ys,ye,x,y,Xs,Xe,Ys,Ye,IXs,IXe,IYs,IYe;
259   PetscInt         up,down,left,right,i,n0,n1,n2,n3,n5,n6,n7,n8,*idx,nn;
260   PetscInt         xbase,*bases,*ldims,j,x_t,y_t,s_t,base,count;
261   PetscInt         s_x,s_y; /* s proportionalized to w */
262   PetscInt         sn0 = 0,sn2 = 0,sn6 = 0,sn8 = 0;
263   Vec              local,global;
264   VecScatter       gtol;
265   IS               to,from;
266   PetscErrorCode   ierr;
267 
268   PetscFunctionBegin;
269   if (stencil_type == DMDA_STENCIL_BOX && (bx == DM_BOUNDARY_MIRROR || by == DM_BOUNDARY_MIRROR)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Mirror boundary and box stencil");
270   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
271 #if !defined(PETSC_USE_64BIT_INDICES)
272   if (((PetscInt64) M)*((PetscInt64) N)*((PetscInt64) dof) > (PetscInt64) PETSC_MPI_INT_MAX) SETERRQ3(comm,PETSC_ERR_INT_OVERFLOW,"Mesh of %D by %D by %D (dof) is too large for 32 bit indices",M,N,dof);
273 #endif
274 
275   if (dof < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Must have 1 or more degrees of freedom per node: %D",dof);
276   if (s < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Stencil width cannot be negative: %D",s);
277 
278   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
279   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
280 
281   dd->p = 1;
282   if (m != PETSC_DECIDE) {
283     if (m < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in X direction: %D",m);
284     else if (m > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in X direction: %D %d",m,size);
285   }
286   if (n != PETSC_DECIDE) {
287     if (n < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in Y direction: %D",n);
288     else if (n > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in Y direction: %D %d",n,size);
289   }
290 
291   if (m == PETSC_DECIDE || n == PETSC_DECIDE) {
292     if (n != PETSC_DECIDE) {
293       m = size/n;
294     } else if (m != PETSC_DECIDE) {
295       n = size/m;
296     } else {
297       /* try for squarish distribution */
298       m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N)));
299       if (!m) m = 1;
300       while (m > 0) {
301         n = size/m;
302         if (m*n == size) break;
303         m--;
304       }
305       if (M > N && m < n) {PetscInt _m = m; m = n; n = _m;}
306     }
307     if (m*n != size) SETERRQ(comm,PETSC_ERR_PLIB,"Unable to create partition, check the size of the communicator and input m and n ");
308   } else if (m*n != size) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"Given Bad partition");
309 
310   if (M < m) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in x direction is too fine! %D %D",M,m);
311   if (N < n) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in y direction is too fine! %D %D",N,n);
312 
313   /*
314      Determine locally owned region
315      xs is the first local node number, x is the number of local nodes
316   */
317   if (!lx) {
318     ierr = PetscMalloc1(m, &dd->lx);CHKERRQ(ierr);
319     lx   = dd->lx;
320     for (i=0; i<m; i++) {
321       lx[i] = M/m + ((M % m) > i);
322     }
323   }
324   x  = lx[rank % m];
325   xs = 0;
326   for (i=0; i<(rank % m); i++) {
327     xs += lx[i];
328   }
329 #if defined(PETSC_USE_DEBUG)
330   left = xs;
331   for (i=(rank % m); i<m; i++) {
332     left += lx[i];
333   }
334   if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M: %D %D",left,M);
335 #endif
336 
337   /*
338      Determine locally owned region
339      ys is the first local node number, y is the number of local nodes
340   */
341   if (!ly) {
342     ierr = PetscMalloc1(n, &dd->ly);CHKERRQ(ierr);
343     ly   = dd->ly;
344     for (i=0; i<n; i++) {
345       ly[i] = N/n + ((N % n) > i);
346     }
347   }
348   y  = ly[rank/m];
349   ys = 0;
350   for (i=0; i<(rank/m); i++) {
351     ys += ly[i];
352   }
353 #if defined(PETSC_USE_DEBUG)
354   left = ys;
355   for (i=(rank/m); i<n; i++) {
356     left += ly[i];
357   }
358   if (left != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of ly across processors not equal to N: %D %D",left,N);
359 #endif
360 
361   /*
362    check if the scatter requires more than one process neighbor or wraps around
363    the domain more than once
364   */
365   if ((x < s) && ((m > 1) || (bx == DM_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s);
366   if ((y < s) && ((n > 1) || (by == DM_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local y-width of domain y %D is smaller than stencil width s %D",y,s);
367   xe = xs + x;
368   ye = ys + y;
369 
370   /* determine ghost region (Xs) and region scattered into (IXs)  */
371   if (xs-s > 0) {
372     Xs = xs - s; IXs = xs - s;
373   } else {
374     if (bx) {
375       Xs = xs - s;
376     } else {
377       Xs = 0;
378     }
379     IXs = 0;
380   }
381   if (xe+s <= M) {
382     Xe = xe + s; IXe = xe + s;
383   } else {
384     if (bx) {
385       Xs = xs - s; Xe = xe + s;
386     } else {
387       Xe = M;
388     }
389     IXe = M;
390   }
391 
392   if (bx == DM_BOUNDARY_PERIODIC || bx == DM_BOUNDARY_MIRROR) {
393     IXs = xs - s;
394     IXe = xe + s;
395     Xs  = xs - s;
396     Xe  = xe + s;
397   }
398 
399   if (ys-s > 0) {
400     Ys = ys - s; IYs = ys - s;
401   } else {
402     if (by) {
403       Ys = ys - s;
404     } else {
405       Ys = 0;
406     }
407     IYs = 0;
408   }
409   if (ye+s <= N) {
410     Ye = ye + s; IYe = ye + s;
411   } else {
412     if (by) {
413       Ye = ye + s;
414     } else {
415       Ye = N;
416     }
417     IYe = N;
418   }
419 
420   if (by == DM_BOUNDARY_PERIODIC || by == DM_BOUNDARY_MIRROR) {
421     IYs = ys - s;
422     IYe = ye + s;
423     Ys  = ys - s;
424     Ye  = ye + s;
425   }
426 
427   /* stencil length in each direction */
428   s_x = s;
429   s_y = s;
430 
431   /* determine starting point of each processor */
432   nn       = x*y;
433   ierr     = PetscMalloc2(size+1,&bases,size,&ldims);CHKERRQ(ierr);
434   ierr     = MPI_Allgather(&nn,1,MPIU_INT,ldims,1,MPIU_INT,comm);CHKERRQ(ierr);
435   bases[0] = 0;
436   for (i=1; i<=size; i++) {
437     bases[i] = ldims[i-1];
438   }
439   for (i=1; i<=size; i++) {
440     bases[i] += bases[i-1];
441   }
442   base = bases[rank]*dof;
443 
444   /* allocate the base parallel and sequential vectors */
445   dd->Nlocal = x*y*dof;
446   ierr       = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,NULL,&global);CHKERRQ(ierr);
447   dd->nlocal = (Xe-Xs)*(Ye-Ys)*dof;
448   ierr       = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,NULL,&local);CHKERRQ(ierr);
449 
450   /* generate appropriate vector scatters */
451   /* local to global inserts non-ghost point region into global */
452   ierr  = PetscMalloc1((IXe-IXs)*(IYe-IYs),&idx);CHKERRQ(ierr);
453   left  = xs - Xs; right = left + x;
454   down  = ys - Ys; up = down + y;
455   count = 0;
456   for (i=down; i<up; i++) {
457     for (j=left; j<right; j++) {
458       idx[count++] = i*(Xe-Xs) + j;
459     }
460   }
461 
462   /* global to local must include ghost points within the domain,
463      but not ghost points outside the domain that aren't periodic */
464   if (stencil_type == DMDA_STENCIL_BOX) {
465     left  = IXs - Xs; right = left + (IXe-IXs);
466     down  = IYs - Ys; up = down + (IYe-IYs);
467     count = 0;
468     for (i=down; i<up; i++) {
469       for (j=left; j<right; j++) {
470         idx[count++] = j + i*(Xe-Xs);
471       }
472     }
473     ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
474 
475   } else {
476     /* must drop into cross shape region */
477     /*       ---------|
478             |  top    |
479          |---         ---| up
480          |   middle      |
481          |               |
482          ----         ---- down
483             | bottom  |
484             -----------
485          Xs xs        xe Xe */
486     left  = xs - Xs; right = left + x;
487     down  = ys - Ys; up = down + y;
488     count = 0;
489     /* bottom */
490     for (i=(IYs-Ys); i<down; i++) {
491       for (j=left; j<right; j++) {
492         idx[count++] = j + i*(Xe-Xs);
493       }
494     }
495     /* middle */
496     for (i=down; i<up; i++) {
497       for (j=(IXs-Xs); j<(IXe-Xs); j++) {
498         idx[count++] = j + i*(Xe-Xs);
499       }
500     }
501     /* top */
502     for (i=up; i<up+IYe-ye; i++) {
503       for (j=left; j<right; j++) {
504         idx[count++] = j + i*(Xe-Xs);
505       }
506     }
507     ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
508   }
509 
510 
511   /* determine who lies on each side of us stored in    n6 n7 n8
512                                                         n3    n5
513                                                         n0 n1 n2
514   */
515 
516   /* Assume the Non-Periodic Case */
517   n1 = rank - m;
518   if (rank % m) {
519     n0 = n1 - 1;
520   } else {
521     n0 = -1;
522   }
523   if ((rank+1) % m) {
524     n2 = n1 + 1;
525     n5 = rank + 1;
526     n8 = rank + m + 1; if (n8 >= m*n) n8 = -1;
527   } else {
528     n2 = -1; n5 = -1; n8 = -1;
529   }
530   if (rank % m) {
531     n3 = rank - 1;
532     n6 = n3 + m; if (n6 >= m*n) n6 = -1;
533   } else {
534     n3 = -1; n6 = -1;
535   }
536   n7 = rank + m; if (n7 >= m*n) n7 = -1;
537 
538   if (bx == DM_BOUNDARY_PERIODIC && by == DM_BOUNDARY_PERIODIC) {
539     /* Modify for Periodic Cases */
540     /* Handle all four corners */
541     if ((n6 < 0) && (n7 < 0) && (n3 < 0)) n6 = m-1;
542     if ((n8 < 0) && (n7 < 0) && (n5 < 0)) n8 = 0;
543     if ((n2 < 0) && (n5 < 0) && (n1 < 0)) n2 = size-m;
544     if ((n0 < 0) && (n3 < 0) && (n1 < 0)) n0 = size-1;
545 
546     /* Handle Top and Bottom Sides */
547     if (n1 < 0) n1 = rank + m * (n-1);
548     if (n7 < 0) n7 = rank - m * (n-1);
549     if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1;
550     if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1;
551     if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1;
552     if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1;
553 
554     /* Handle Left and Right Sides */
555     if (n3 < 0) n3 = rank + (m-1);
556     if (n5 < 0) n5 = rank - (m-1);
557     if ((n1 >= 0) && (n0 < 0)) n0 = rank-1;
558     if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1;
559     if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1;
560     if ((n7 >= 0) && (n8 < 0)) n8 = rank+1;
561   } else if (by == DM_BOUNDARY_PERIODIC) {  /* Handle Top and Bottom Sides */
562     if (n1 < 0) n1 = rank + m * (n-1);
563     if (n7 < 0) n7 = rank - m * (n-1);
564     if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1;
565     if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1;
566     if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1;
567     if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1;
568   } else if (bx == DM_BOUNDARY_PERIODIC) { /* Handle Left and Right Sides */
569     if (n3 < 0) n3 = rank + (m-1);
570     if (n5 < 0) n5 = rank - (m-1);
571     if ((n1 >= 0) && (n0 < 0)) n0 = rank-1;
572     if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1;
573     if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1;
574     if ((n7 >= 0) && (n8 < 0)) n8 = rank+1;
575   }
576 
577   ierr = PetscMalloc1(9,&dd->neighbors);CHKERRQ(ierr);
578 
579   dd->neighbors[0] = n0;
580   dd->neighbors[1] = n1;
581   dd->neighbors[2] = n2;
582   dd->neighbors[3] = n3;
583   dd->neighbors[4] = rank;
584   dd->neighbors[5] = n5;
585   dd->neighbors[6] = n6;
586   dd->neighbors[7] = n7;
587   dd->neighbors[8] = n8;
588 
589   if (stencil_type == DMDA_STENCIL_STAR) {
590     /* save corner processor numbers */
591     sn0 = n0; sn2 = n2; sn6 = n6; sn8 = n8;
592     n0  = n2 = n6 = n8 = -1;
593   }
594 
595   ierr = PetscMalloc1((Xe-Xs)*(Ye-Ys),&idx);CHKERRQ(ierr);
596 
597   nn = 0;
598   xbase = bases[rank];
599   for (i=1; i<=s_y; i++) {
600     if (n0 >= 0) { /* left below */
601       x_t = lx[n0 % m];
602       y_t = ly[(n0/m)];
603       s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x;
604       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
605     }
606 
607     if (n1 >= 0) { /* directly below */
608       x_t = x;
609       y_t = ly[(n1/m)];
610       s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t;
611       for (j=0; j<x_t; j++) idx[nn++] = s_t++;
612     } else if (by == DM_BOUNDARY_MIRROR) {
613       for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1)  + j;
614     }
615 
616     if (n2 >= 0) { /* right below */
617       x_t = lx[n2 % m];
618       y_t = ly[(n2/m)];
619       s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t;
620       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
621     }
622   }
623 
624   for (i=0; i<y; i++) {
625     if (n3 >= 0) { /* directly left */
626       x_t = lx[n3 % m];
627       /* y_t = y; */
628       s_t = bases[n3] + (i+1)*x_t - s_x;
629       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
630     } else if (bx == DM_BOUNDARY_MIRROR) {
631       for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j;
632     }
633 
634     for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */
635 
636     if (n5 >= 0) { /* directly right */
637       x_t = lx[n5 % m];
638       /* y_t = y; */
639       s_t = bases[n5] + (i)*x_t;
640       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
641     } else if (bx == DM_BOUNDARY_MIRROR) {
642       for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j;
643     }
644   }
645 
646   for (i=1; i<=s_y; i++) {
647     if (n6 >= 0) { /* left above */
648       x_t = lx[n6 % m];
649       /* y_t = ly[(n6/m)]; */
650       s_t = bases[n6] + (i)*x_t - s_x;
651       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
652     }
653 
654     if (n7 >= 0) { /* directly above */
655       x_t = x;
656       /* y_t = ly[(n7/m)]; */
657       s_t = bases[n7] + (i-1)*x_t;
658       for (j=0; j<x_t; j++) idx[nn++] = s_t++;
659     } else if (by == DM_BOUNDARY_MIRROR) {
660       for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1)  + j;
661     }
662 
663     if (n8 >= 0) { /* right above */
664       x_t = lx[n8 % m];
665       /* y_t = ly[(n8/m)]; */
666       s_t = bases[n8] + (i-1)*x_t;
667       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
668     }
669   }
670 
671   ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_USE_POINTER,&from);CHKERRQ(ierr);
672   ierr = VecScatterCreate(global,from,local,to,&gtol);CHKERRQ(ierr);
673   ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)gtol);CHKERRQ(ierr);
674   ierr = ISDestroy(&to);CHKERRQ(ierr);
675   ierr = ISDestroy(&from);CHKERRQ(ierr);
676 
677   if (stencil_type == DMDA_STENCIL_STAR) {
678     n0 = sn0; n2 = sn2; n6 = sn6; n8 = sn8;
679   }
680 
681   if (((stencil_type == DMDA_STENCIL_STAR)  || (bx && bx != DM_BOUNDARY_PERIODIC) || (by && by != DM_BOUNDARY_PERIODIC))) {
682     /*
683         Recompute the local to global mappings, this time keeping the
684       information about the cross corner processor numbers and any ghosted
685       but not periodic indices.
686     */
687     nn    = 0;
688     xbase = bases[rank];
689     for (i=1; i<=s_y; i++) {
690       if (n0 >= 0) { /* left below */
691         x_t = lx[n0 % m];
692         y_t = ly[(n0/m)];
693         s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x;
694         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
695       } else if (xs-Xs > 0 && ys-Ys > 0) {
696         for (j=0; j<s_x; j++) idx[nn++] = -1;
697       }
698       if (n1 >= 0) { /* directly below */
699         x_t = x;
700         y_t = ly[(n1/m)];
701         s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t;
702         for (j=0; j<x_t; j++) idx[nn++] = s_t++;
703       } else if (ys-Ys > 0) {
704         if (by == DM_BOUNDARY_MIRROR) {
705           for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1)  + j;
706         } else {
707           for (j=0; j<x; j++) idx[nn++] = -1;
708         }
709       }
710       if (n2 >= 0) { /* right below */
711         x_t = lx[n2 % m];
712         y_t = ly[(n2/m)];
713         s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t;
714         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
715       } else if (Xe-xe> 0 && ys-Ys > 0) {
716         for (j=0; j<s_x; j++) idx[nn++] = -1;
717       }
718     }
719 
720     for (i=0; i<y; i++) {
721       if (n3 >= 0) { /* directly left */
722         x_t = lx[n3 % m];
723         /* y_t = y; */
724         s_t = bases[n3] + (i+1)*x_t - s_x;
725         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
726       } else if (xs-Xs > 0) {
727         if (bx == DM_BOUNDARY_MIRROR) {
728           for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j;
729         } else {
730           for (j=0; j<s_x; j++) idx[nn++] = -1;
731         }
732       }
733 
734       for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */
735 
736       if (n5 >= 0) { /* directly right */
737         x_t = lx[n5 % m];
738         /* y_t = y; */
739         s_t = bases[n5] + (i)*x_t;
740         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
741       } else if (Xe-xe > 0) {
742         if (bx == DM_BOUNDARY_MIRROR) {
743           for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j;
744         } else {
745           for (j=0; j<s_x; j++) idx[nn++] = -1;
746         }
747       }
748     }
749 
750     for (i=1; i<=s_y; i++) {
751       if (n6 >= 0) { /* left above */
752         x_t = lx[n6 % m];
753         /* y_t = ly[(n6/m)]; */
754         s_t = bases[n6] + (i)*x_t - s_x;
755         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
756       } else if (xs-Xs > 0 && Ye-ye > 0) {
757         for (j=0; j<s_x; j++) idx[nn++] = -1;
758       }
759       if (n7 >= 0) { /* directly above */
760         x_t = x;
761         /* y_t = ly[(n7/m)]; */
762         s_t = bases[n7] + (i-1)*x_t;
763         for (j=0; j<x_t; j++) idx[nn++] = s_t++;
764       } else if (Ye-ye > 0) {
765         if (by == DM_BOUNDARY_MIRROR) {
766           for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1)  + j;
767         } else {
768           for (j=0; j<x; j++) idx[nn++] = -1;
769         }
770       }
771       if (n8 >= 0) { /* right above */
772         x_t = lx[n8 % m];
773         /* y_t = ly[(n8/m)]; */
774         s_t = bases[n8] + (i-1)*x_t;
775         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
776       } else if (Xe-xe > 0 && Ye-ye > 0) {
777         for (j=0; j<s_x; j++) idx[nn++] = -1;
778       }
779     }
780   }
781   /*
782      Set the local to global ordering in the global vector, this allows use
783      of VecSetValuesLocal().
784   */
785   ierr = ISLocalToGlobalMappingCreate(comm,dof,nn,idx,PETSC_OWN_POINTER,&da->ltogmap);CHKERRQ(ierr);
786   ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)da->ltogmap);CHKERRQ(ierr);
787 
788   ierr  = PetscFree2(bases,ldims);CHKERRQ(ierr);
789   dd->m = m;  dd->n  = n;
790   /* note petsc expects xs/xe/Xs/Xe to be multiplied by #dofs in many places */
791   dd->xs = xs*dof; dd->xe = xe*dof; dd->ys = ys; dd->ye = ye; dd->zs = 0; dd->ze = 1;
792   dd->Xs = Xs*dof; dd->Xe = Xe*dof; dd->Ys = Ys; dd->Ye = Ye; dd->Zs = 0; dd->Ze = 1;
793 
794   ierr = VecDestroy(&local);CHKERRQ(ierr);
795   ierr = VecDestroy(&global);CHKERRQ(ierr);
796 
797   dd->gtol      = gtol;
798   dd->base      = base;
799   da->ops->view = DMView_DA_2d;
800   dd->ltol      = NULL;
801   dd->ao        = NULL;
802   PetscFunctionReturn(0);
803 }
804 
805 /*@C
806    DMDACreate2d -  Creates an object that will manage the communication of  two-dimensional
807    regular array data that is distributed across some processors.
808 
809    Collective on MPI_Comm
810 
811    Input Parameters:
812 +  comm - MPI communicator
813 .  bx,by - type of ghost nodes the array have.
814          Use one of DM_BOUNDARY_NONE, DM_BOUNDARY_GHOSTED, DM_BOUNDARY_PERIODIC.
815 .  stencil_type - stencil type.  Use either DMDA_STENCIL_BOX or DMDA_STENCIL_STAR.
816 .  M,N - global dimension in each direction of the array
817 .  m,n - corresponding number of processors in each dimension
818          (or PETSC_DECIDE to have calculated)
819 .  dof - number of degrees of freedom per node
820 .  s - stencil width
821 -  lx, ly - arrays containing the number of nodes in each cell along
822            the x and y coordinates, or NULL. If non-null, these
823            must be of length as m and n, and the corresponding
824            m and n cannot be PETSC_DECIDE. The sum of the lx[] entries
825            must be M, and the sum of the ly[] entries must be N.
826 
827    Output Parameter:
828 .  da - the resulting distributed array object
829 
830    Options Database Key:
831 +  -dm_view - Calls DMView() at the conclusion of DMDACreate2d()
832 .  -da_grid_x <nx> - number of grid points in x direction, if M < 0
833 .  -da_grid_y <ny> - number of grid points in y direction, if N < 0
834 .  -da_processors_x <nx> - number of processors in x direction
835 .  -da_processors_y <ny> - number of processors in y direction
836 .  -da_refine_x <rx> - refinement ratio in x direction
837 .  -da_refine_y <ry> - refinement ratio in y direction
838 -  -da_refine <n> - refine the DMDA n times before creating, if M or N < 0
839 
840 
841    Level: beginner
842 
843    Notes:
844    The stencil type DMDA_STENCIL_STAR with width 1 corresponds to the
845    standard 5-pt stencil, while DMDA_STENCIL_BOX with width 1 denotes
846    the standard 9-pt stencil.
847 
848    The array data itself is NOT stored in the DMDA, it is stored in Vec objects;
849    The appropriate vector objects can be obtained with calls to DMCreateGlobalVector()
850    and DMCreateLocalVector() and calls to VecDuplicate() if more are needed.
851 
852    You must call DMSetUp() after this call before using this DM.
853 
854    If you wish to use the options database to change values in the DMDA call DMSetFromOptions() after this call
855    but before DMSetUp().
856 
857 .keywords: distributed array, create, two-dimensional
858 
859 .seealso: DMDestroy(), DMView(), DMDACreate1d(), DMDACreate3d(), DMGlobalToLocalBegin(), DMDAGetRefinementFactor(),
860           DMGlobalToLocalEnd(), DMLocalToGlobalBegin(), DMLocalToLocalBegin(), DMLocalToLocalEnd(), DMDASetRefinementFactor(),
861           DMDAGetInfo(), DMCreateGlobalVector(), DMCreateLocalVector(), DMDACreateNaturalVector(), DMLoad(), DMDAGetOwnershipRanges()
862 
863 @*/
864 
865 PetscErrorCode  DMDACreate2d(MPI_Comm comm,DMBoundaryType bx,DMBoundaryType by,DMDAStencilType stencil_type,
866                           PetscInt M,PetscInt N,PetscInt m,PetscInt n,PetscInt dof,PetscInt s,const PetscInt lx[],const PetscInt ly[],DM *da)
867 {
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   ierr = DMDACreate(comm, da);CHKERRQ(ierr);
872   ierr = DMSetDimension(*da, 2);CHKERRQ(ierr);
873   ierr = DMDASetSizes(*da, M, N, 1);CHKERRQ(ierr);
874   ierr = DMDASetNumProcs(*da, m, n, PETSC_DECIDE);CHKERRQ(ierr);
875   ierr = DMDASetBoundaryType(*da, bx, by, DM_BOUNDARY_NONE);CHKERRQ(ierr);
876   ierr = DMDASetDof(*da, dof);CHKERRQ(ierr);
877   ierr = DMDASetStencilType(*da, stencil_type);CHKERRQ(ierr);
878   ierr = DMDASetStencilWidth(*da, s);CHKERRQ(ierr);
879   ierr = DMDASetOwnershipRanges(*da, lx, ly, NULL);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882