xref: /petsc/src/dm/impls/da/da2.c (revision 4d1a973fe17c9234e2cf908ebb478400f033ef22)
1 
2 #include <petsc/private/dmdaimpl.h>    /*I   "petscdmda.h"   I*/
3 #include <petscdraw.h>
4 
5 static PetscErrorCode DMView_DA_2d(DM da,PetscViewer viewer)
6 {
7   PetscErrorCode ierr;
8   PetscMPIInt    rank;
9   PetscBool      iascii,isdraw,isglvis,isbinary;
10   DM_DA          *dd = (DM_DA*)da->data;
11 #if defined(PETSC_HAVE_MATLAB_ENGINE)
12   PetscBool ismatlab;
13 #endif
14 
15   PetscFunctionBegin;
16   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);CHKERRQ(ierr);
17 
18   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
19   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
20   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERGLVIS,&isglvis);CHKERRQ(ierr);
21   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
22 #if defined(PETSC_HAVE_MATLAB_ENGINE)
23   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERMATLAB,&ismatlab);CHKERRQ(ierr);
24 #endif
25   if (iascii) {
26     PetscViewerFormat format;
27 
28     ierr = PetscViewerGetFormat(viewer, &format);CHKERRQ(ierr);
29     if (format == PETSC_VIEWER_LOAD_BALANCE) {
30       PetscInt      i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal;
31       DMDALocalInfo info;
32       PetscMPIInt   size;
33       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);CHKERRQ(ierr);
34       ierr = DMDAGetLocalInfo(da,&info);CHKERRQ(ierr);
35       nzlocal = info.xm*info.ym;
36       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
37       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
38       for (i=0; i<(PetscInt)size; i++) {
39         nmax = PetscMax(nmax,nz[i]);
40         nmin = PetscMin(nmin,nz[i]);
41         navg += nz[i];
42       }
43       ierr = PetscFree(nz);CHKERRQ(ierr);
44       navg = navg/size;
45       ierr = PetscViewerASCIIPrintf(viewer,"  Load Balance - Grid Points: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
46       PetscFunctionReturn(0);
47     }
48     if (format != PETSC_VIEWER_ASCII_VTK && format != PETSC_VIEWER_ASCII_VTK_CELL && format != PETSC_VIEWER_ASCII_GLVIS) {
49       DMDALocalInfo info;
50       ierr = DMDAGetLocalInfo(da,&info);CHKERRQ(ierr);
51       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
52       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Processor [%d] M %D N %D m %D n %D w %D s %D\n",rank,dd->M,dd->N,dd->m,dd->n,dd->w,dd->s);CHKERRQ(ierr);
53       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"X range of indices: %D %D, Y range of indices: %D %D\n",info.xs,info.xs+info.xm,info.ys,info.ys+info.ym);CHKERRQ(ierr);
54       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
55       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
56     } else if (format == PETSC_VIEWER_ASCII_GLVIS) {
57       ierr = DMView_DA_GLVis(da,viewer);CHKERRQ(ierr);
58     } else {
59       ierr = DMView_DA_VTK(da,viewer);CHKERRQ(ierr);
60     }
61   } else if (isdraw) {
62     PetscDraw      draw;
63     double         ymin = -1*dd->s-1,ymax = dd->N+dd->s;
64     double         xmin = -1*dd->s-1,xmax = dd->M+dd->s;
65     double         x,y;
66     PetscInt       base;
67     const PetscInt *idx;
68     char           node[10];
69     PetscBool      isnull;
70 
71     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
72     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
73     if (isnull) PetscFunctionReturn(0);
74 
75     ierr = PetscDrawCheckResizedWindow(draw);CHKERRQ(ierr);
76     ierr = PetscDrawClear(draw);CHKERRQ(ierr);
77     ierr = PetscDrawSetCoordinates(draw,xmin,ymin,xmax,ymax);CHKERRQ(ierr);
78 
79     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
80     /* first processor draw all node lines */
81     if (!rank) {
82       ymin = 0.0; ymax = dd->N - 1;
83       for (xmin=0; xmin<dd->M; xmin++) {
84         ierr = PetscDrawLine(draw,xmin,ymin,xmin,ymax,PETSC_DRAW_BLACK);CHKERRQ(ierr);
85       }
86       xmin = 0.0; xmax = dd->M - 1;
87       for (ymin=0; ymin<dd->N; ymin++) {
88         ierr = PetscDrawLine(draw,xmin,ymin,xmax,ymin,PETSC_DRAW_BLACK);CHKERRQ(ierr);
89       }
90     }
91     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
92     ierr = PetscDrawFlush(draw);CHKERRQ(ierr);
93     ierr = PetscDrawPause(draw);CHKERRQ(ierr);
94 
95     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
96     /* draw my box */
97     xmin = dd->xs/dd->w; xmax =(dd->xe-1)/dd->w; ymin = dd->ys; ymax = dd->ye - 1;
98     ierr = PetscDrawLine(draw,xmin,ymin,xmax,ymin,PETSC_DRAW_RED);CHKERRQ(ierr);
99     ierr = PetscDrawLine(draw,xmin,ymin,xmin,ymax,PETSC_DRAW_RED);CHKERRQ(ierr);
100     ierr = PetscDrawLine(draw,xmin,ymax,xmax,ymax,PETSC_DRAW_RED);CHKERRQ(ierr);
101     ierr = PetscDrawLine(draw,xmax,ymin,xmax,ymax,PETSC_DRAW_RED);CHKERRQ(ierr);
102     /* put in numbers */
103     base = (dd->base)/dd->w;
104     for (y=ymin; y<=ymax; y++) {
105       for (x=xmin; x<=xmax; x++) {
106         ierr = PetscSNPrintf(node,sizeof(node),"%d",(int)base++);CHKERRQ(ierr);
107         ierr = PetscDrawString(draw,x,y,PETSC_DRAW_BLACK,node);CHKERRQ(ierr);
108       }
109     }
110     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
111     ierr = PetscDrawFlush(draw);CHKERRQ(ierr);
112     ierr = PetscDrawPause(draw);CHKERRQ(ierr);
113 
114     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
115     /* overlay ghost numbers, useful for error checking */
116     ierr = ISLocalToGlobalMappingGetBlockIndices(da->ltogmap,&idx);CHKERRQ(ierr);
117     base = 0; xmin = dd->Xs; xmax = dd->Xe; ymin = dd->Ys; ymax = dd->Ye;
118     for (y=ymin; y<ymax; y++) {
119       for (x=xmin; x<xmax; x++) {
120         if ((base % dd->w) == 0) {
121           ierr = PetscSNPrintf(node,sizeof(node),"%d",(int)(idx[base/dd->w]));CHKERRQ(ierr);
122           ierr = PetscDrawString(draw,x/dd->w,y,PETSC_DRAW_BLUE,node);CHKERRQ(ierr);
123         }
124         base++;
125       }
126     }
127     ierr = ISLocalToGlobalMappingRestoreBlockIndices(da->ltogmap,&idx);CHKERRQ(ierr);
128     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
129     ierr = PetscDrawFlush(draw);CHKERRQ(ierr);
130     ierr = PetscDrawPause(draw);CHKERRQ(ierr);
131     ierr = PetscDrawSave(draw);CHKERRQ(ierr);
132   } else if (isglvis) {
133     ierr = DMView_DA_GLVis(da,viewer);CHKERRQ(ierr);
134   } else if (isbinary) {
135     ierr = DMView_DA_Binary(da,viewer);CHKERRQ(ierr);
136 #if defined(PETSC_HAVE_MATLAB_ENGINE)
137   } else if (ismatlab) {
138     ierr = DMView_DA_Matlab(da,viewer);CHKERRQ(ierr);
139 #endif
140   }
141   PetscFunctionReturn(0);
142 }
143 
144 /*
145       M is number of grid points
146       m is number of processors
147 
148 */
149 PetscErrorCode  DMDASplitComm2d(MPI_Comm comm,PetscInt M,PetscInt N,PetscInt sw,MPI_Comm *outcomm)
150 {
151   PetscErrorCode ierr;
152   PetscInt       m,n = 0,x = 0,y = 0;
153   PetscMPIInt    size,csize,rank;
154 
155   PetscFunctionBegin;
156   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
157   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
158 
159   csize = 4*size;
160   do {
161     if (csize % 4) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Cannot split communicator of size %d tried %d %D %D",size,csize,x,y);
162     csize = csize/4;
163 
164     m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)csize)/((PetscReal)N)));
165     if (!m) m = 1;
166     while (m > 0) {
167       n = csize/m;
168       if (m*n == csize) break;
169       m--;
170     }
171     if (M > N && m < n) {PetscInt _m = m; m = n; n = _m;}
172 
173     x = M/m + ((M % m) > ((csize-1) % m));
174     y = (N + (csize-1)/m)/n;
175   } while ((x < 4 || y < 4) && csize > 1);
176   if (size != csize) {
177     MPI_Group   entire_group,sub_group;
178     PetscMPIInt i,*groupies;
179 
180     ierr = MPI_Comm_group(comm,&entire_group);CHKERRQ(ierr);
181     ierr = PetscMalloc1(csize,&groupies);CHKERRQ(ierr);
182     for (i=0; i<csize; i++) {
183       groupies[i] = (rank/csize)*csize + i;
184     }
185     ierr = MPI_Group_incl(entire_group,csize,groupies,&sub_group);CHKERRQ(ierr);
186     ierr = PetscFree(groupies);CHKERRQ(ierr);
187     ierr = MPI_Comm_create(comm,sub_group,outcomm);CHKERRQ(ierr);
188     ierr = MPI_Group_free(&entire_group);CHKERRQ(ierr);
189     ierr = MPI_Group_free(&sub_group);CHKERRQ(ierr);
190     ierr = PetscInfo1(0,"DMDASplitComm2d:Creating redundant coarse problems of size %d\n",csize);CHKERRQ(ierr);
191   } else {
192     *outcomm = comm;
193   }
194   PetscFunctionReturn(0);
195 }
196 
197 #if defined(new)
198 /*
199   DMDAGetDiagonal_MFFD - Gets the diagonal for a matrix free matrix where local
200     function lives on a DMDA
201 
202         y ~= (F(u + ha) - F(u))/h,
203   where F = nonlinear function, as set by SNESSetFunction()
204         u = current iterate
205         h = difference interval
206 */
207 PetscErrorCode DMDAGetDiagonal_MFFD(DM da,Vec U,Vec a)
208 {
209   PetscScalar    h,*aa,*ww,v;
210   PetscReal      epsilon = PETSC_SQRT_MACHINE_EPSILON,umin = 100.0*PETSC_SQRT_MACHINE_EPSILON;
211   PetscErrorCode ierr;
212   PetscInt       gI,nI;
213   MatStencil     stencil;
214   DMDALocalInfo  info;
215 
216   PetscFunctionBegin;
217   ierr = (*ctx->func)(0,U,a,ctx->funcctx);CHKERRQ(ierr);
218   ierr = (*ctx->funcisetbase)(U,ctx->funcctx);CHKERRQ(ierr);
219 
220   ierr = VecGetArray(U,&ww);CHKERRQ(ierr);
221   ierr = VecGetArray(a,&aa);CHKERRQ(ierr);
222 
223   nI = 0;
224   h  = ww[gI];
225   if (h == 0.0) h = 1.0;
226   if (PetscAbsScalar(h) < umin && PetscRealPart(h) >= 0.0) h = umin;
227   else if (PetscRealPart(h) < 0.0 && PetscAbsScalar(h) < umin) h = -umin;
228   h *= epsilon;
229 
230   ww[gI] += h;
231   ierr    = (*ctx->funci)(i,w,&v,ctx->funcctx);CHKERRQ(ierr);
232   aa[nI]  = (v - aa[nI])/h;
233   ww[gI] -= h;
234   nI++;
235 
236   ierr = VecRestoreArray(U,&ww);CHKERRQ(ierr);
237   ierr = VecRestoreArray(a,&aa);CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240 #endif
241 
242 PetscErrorCode  DMSetUp_DA_2D(DM da)
243 {
244   DM_DA            *dd = (DM_DA*)da->data;
245   const PetscInt   M            = dd->M;
246   const PetscInt   N            = dd->N;
247   PetscInt         m            = dd->m;
248   PetscInt         n            = dd->n;
249   const PetscInt   dof          = dd->w;
250   const PetscInt   s            = dd->s;
251   DMBoundaryType   bx           = dd->bx;
252   DMBoundaryType   by           = dd->by;
253   DMDAStencilType  stencil_type = dd->stencil_type;
254   PetscInt         *lx          = dd->lx;
255   PetscInt         *ly          = dd->ly;
256   MPI_Comm         comm;
257   PetscMPIInt      rank,size;
258   PetscInt         xs,xe,ys,ye,x,y,Xs,Xe,Ys,Ye,IXs,IXe,IYs,IYe;
259   PetscInt         up,down,left,right,i,n0,n1,n2,n3,n5,n6,n7,n8,*idx,nn;
260   PetscInt         xbase,*bases,*ldims,j,x_t,y_t,s_t,base,count;
261   PetscInt         s_x,s_y; /* s proportionalized to w */
262   PetscInt         sn0 = 0,sn2 = 0,sn6 = 0,sn8 = 0;
263   Vec              local,global;
264   VecScatter       gtol;
265   IS               to,from;
266   PetscErrorCode   ierr;
267 
268   PetscFunctionBegin;
269   if (stencil_type == DMDA_STENCIL_BOX && (bx == DM_BOUNDARY_MIRROR || by == DM_BOUNDARY_MIRROR)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Mirror boundary and box stencil");
270   ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
271 #if !defined(PETSC_USE_64BIT_INDICES)
272   if (((PetscInt64) M)*((PetscInt64) N)*((PetscInt64) dof) > (PetscInt64) PETSC_MPI_INT_MAX) SETERRQ3(comm,PETSC_ERR_INT_OVERFLOW,"Mesh of %D by %D by %D (dof) is too large for 32 bit indices",M,N,dof);
273 #endif
274 
275   if (dof < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Must have 1 or more degrees of freedom per node: %D",dof);
276   if (s < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Stencil width cannot be negative: %D",s);
277 
278   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
279   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
280 
281   dd->p = 1;
282   if (m != PETSC_DECIDE) {
283     if (m < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in X direction: %D",m);
284     else if (m > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in X direction: %D %d",m,size);
285   }
286   if (n != PETSC_DECIDE) {
287     if (n < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in Y direction: %D",n);
288     else if (n > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in Y direction: %D %d",n,size);
289   }
290 
291   if (m == PETSC_DECIDE || n == PETSC_DECIDE) {
292     if (n != PETSC_DECIDE) {
293       m = size/n;
294     } else if (m != PETSC_DECIDE) {
295       n = size/m;
296     } else {
297       /* try for squarish distribution */
298       m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N)));
299       if (!m) m = 1;
300       while (m > 0) {
301         n = size/m;
302         if (m*n == size) break;
303         m--;
304       }
305       if (M > N && m < n) {PetscInt _m = m; m = n; n = _m;}
306     }
307     if (m*n != size) SETERRQ(comm,PETSC_ERR_PLIB,"Unable to create partition, check the size of the communicator and input m and n ");
308   } else if (m*n != size) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"Given Bad partition");
309 
310   if (M < m) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in x direction is too fine! %D %D",M,m);
311   if (N < n) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in y direction is too fine! %D %D",N,n);
312 
313   /*
314      Determine locally owned region
315      xs is the first local node number, x is the number of local nodes
316   */
317   if (!lx) {
318     ierr = PetscMalloc1(m, &dd->lx);CHKERRQ(ierr);
319     lx   = dd->lx;
320     for (i=0; i<m; i++) {
321       lx[i] = M/m + ((M % m) > i);
322     }
323   }
324   x  = lx[rank % m];
325   xs = 0;
326   for (i=0; i<(rank % m); i++) {
327     xs += lx[i];
328   }
329 #if defined(PETSC_USE_DEBUG)
330   left = xs;
331   for (i=(rank % m); i<m; i++) {
332     left += lx[i];
333   }
334   if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M: %D %D",left,M);
335 #endif
336 
337   /*
338      Determine locally owned region
339      ys is the first local node number, y is the number of local nodes
340   */
341   if (!ly) {
342     ierr = PetscMalloc1(n, &dd->ly);CHKERRQ(ierr);
343     ly   = dd->ly;
344     for (i=0; i<n; i++) {
345       ly[i] = N/n + ((N % n) > i);
346     }
347   }
348   y  = ly[rank/m];
349   ys = 0;
350   for (i=0; i<(rank/m); i++) {
351     ys += ly[i];
352   }
353 #if defined(PETSC_USE_DEBUG)
354   left = ys;
355   for (i=(rank/m); i<n; i++) {
356     left += ly[i];
357   }
358   if (left != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of ly across processors not equal to N: %D %D",left,N);
359 #endif
360 
361   /*
362    check if the scatter requires more than one process neighbor or wraps around
363    the domain more than once
364   */
365   if ((x < s) && ((m > 1) || (bx == DM_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s);
366   if ((y < s) && ((n > 1) || (by == DM_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local y-width of domain y %D is smaller than stencil width s %D",y,s);
367   xe = xs + x;
368   ye = ys + y;
369 
370   /* determine ghost region (Xs) and region scattered into (IXs)  */
371   if (xs-s > 0) {
372     Xs = xs - s; IXs = xs - s;
373   } else {
374     if (bx) {
375       Xs = xs - s;
376     } else {
377       Xs = 0;
378     }
379     IXs = 0;
380   }
381   if (xe+s <= M) {
382     Xe = xe + s; IXe = xe + s;
383   } else {
384     if (bx) {
385       Xs = xs - s; Xe = xe + s;
386     } else {
387       Xe = M;
388     }
389     IXe = M;
390   }
391 
392   if (bx == DM_BOUNDARY_PERIODIC || bx == DM_BOUNDARY_MIRROR) {
393     IXs = xs - s;
394     IXe = xe + s;
395     Xs  = xs - s;
396     Xe  = xe + s;
397   }
398 
399   if (ys-s > 0) {
400     Ys = ys - s; IYs = ys - s;
401   } else {
402     if (by) {
403       Ys = ys - s;
404     } else {
405       Ys = 0;
406     }
407     IYs = 0;
408   }
409   if (ye+s <= N) {
410     Ye = ye + s; IYe = ye + s;
411   } else {
412     if (by) {
413       Ye = ye + s;
414     } else {
415       Ye = N;
416     }
417     IYe = N;
418   }
419 
420   if (by == DM_BOUNDARY_PERIODIC || by == DM_BOUNDARY_MIRROR) {
421     IYs = ys - s;
422     IYe = ye + s;
423     Ys  = ys - s;
424     Ye  = ye + s;
425   }
426 
427   /* stencil length in each direction */
428   s_x = s;
429   s_y = s;
430 
431   /* determine starting point of each processor */
432   nn       = x*y;
433   ierr     = PetscMalloc2(size+1,&bases,size,&ldims);CHKERRQ(ierr);
434   ierr     = MPI_Allgather(&nn,1,MPIU_INT,ldims,1,MPIU_INT,comm);CHKERRQ(ierr);
435   bases[0] = 0;
436   for (i=1; i<=size; i++) {
437     bases[i] = ldims[i-1];
438   }
439   for (i=1; i<=size; i++) {
440     bases[i] += bases[i-1];
441   }
442   base = bases[rank]*dof;
443 
444   /* allocate the base parallel and sequential vectors */
445   dd->Nlocal = x*y*dof;
446   ierr       = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,NULL,&global);CHKERRQ(ierr);
447   dd->nlocal = (Xe-Xs)*(Ye-Ys)*dof;
448   ierr       = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,NULL,&local);CHKERRQ(ierr);
449 
450   /* generate global to local vector scatter and local to global mapping*/
451 
452   /* global to local must include ghost points within the domain,
453      but not ghost points outside the domain that aren't periodic */
454   ierr  = PetscMalloc1((IXe-IXs)*(IYe-IYs),&idx);CHKERRQ(ierr);
455   if (stencil_type == DMDA_STENCIL_BOX) {
456     left  = IXs - Xs; right = left + (IXe-IXs);
457     down  = IYs - Ys; up = down + (IYe-IYs);
458     count = 0;
459     for (i=down; i<up; i++) {
460       for (j=left; j<right; j++) {
461         idx[count++] = j + i*(Xe-Xs);
462       }
463     }
464     ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
465 
466   } else {
467     /* must drop into cross shape region */
468     /*       ---------|
469             |  top    |
470          |---         ---| up
471          |   middle      |
472          |               |
473          ----         ---- down
474             | bottom  |
475             -----------
476          Xs xs        xe Xe */
477     left  = xs - Xs; right = left + x;
478     down  = ys - Ys; up = down + y;
479     count = 0;
480     /* bottom */
481     for (i=(IYs-Ys); i<down; i++) {
482       for (j=left; j<right; j++) {
483         idx[count++] = j + i*(Xe-Xs);
484       }
485     }
486     /* middle */
487     for (i=down; i<up; i++) {
488       for (j=(IXs-Xs); j<(IXe-Xs); j++) {
489         idx[count++] = j + i*(Xe-Xs);
490       }
491     }
492     /* top */
493     for (i=up; i<up+IYe-ye; i++) {
494       for (j=left; j<right; j++) {
495         idx[count++] = j + i*(Xe-Xs);
496       }
497     }
498     ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
499   }
500 
501 
502   /* determine who lies on each side of us stored in    n6 n7 n8
503                                                         n3    n5
504                                                         n0 n1 n2
505   */
506 
507   /* Assume the Non-Periodic Case */
508   n1 = rank - m;
509   if (rank % m) {
510     n0 = n1 - 1;
511   } else {
512     n0 = -1;
513   }
514   if ((rank+1) % m) {
515     n2 = n1 + 1;
516     n5 = rank + 1;
517     n8 = rank + m + 1; if (n8 >= m*n) n8 = -1;
518   } else {
519     n2 = -1; n5 = -1; n8 = -1;
520   }
521   if (rank % m) {
522     n3 = rank - 1;
523     n6 = n3 + m; if (n6 >= m*n) n6 = -1;
524   } else {
525     n3 = -1; n6 = -1;
526   }
527   n7 = rank + m; if (n7 >= m*n) n7 = -1;
528 
529   if (bx == DM_BOUNDARY_PERIODIC && by == DM_BOUNDARY_PERIODIC) {
530     /* Modify for Periodic Cases */
531     /* Handle all four corners */
532     if ((n6 < 0) && (n7 < 0) && (n3 < 0)) n6 = m-1;
533     if ((n8 < 0) && (n7 < 0) && (n5 < 0)) n8 = 0;
534     if ((n2 < 0) && (n5 < 0) && (n1 < 0)) n2 = size-m;
535     if ((n0 < 0) && (n3 < 0) && (n1 < 0)) n0 = size-1;
536 
537     /* Handle Top and Bottom Sides */
538     if (n1 < 0) n1 = rank + m * (n-1);
539     if (n7 < 0) n7 = rank - m * (n-1);
540     if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1;
541     if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1;
542     if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1;
543     if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1;
544 
545     /* Handle Left and Right Sides */
546     if (n3 < 0) n3 = rank + (m-1);
547     if (n5 < 0) n5 = rank - (m-1);
548     if ((n1 >= 0) && (n0 < 0)) n0 = rank-1;
549     if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1;
550     if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1;
551     if ((n7 >= 0) && (n8 < 0)) n8 = rank+1;
552   } else if (by == DM_BOUNDARY_PERIODIC) {  /* Handle Top and Bottom Sides */
553     if (n1 < 0) n1 = rank + m * (n-1);
554     if (n7 < 0) n7 = rank - m * (n-1);
555     if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1;
556     if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1;
557     if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1;
558     if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1;
559   } else if (bx == DM_BOUNDARY_PERIODIC) { /* Handle Left and Right Sides */
560     if (n3 < 0) n3 = rank + (m-1);
561     if (n5 < 0) n5 = rank - (m-1);
562     if ((n1 >= 0) && (n0 < 0)) n0 = rank-1;
563     if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1;
564     if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1;
565     if ((n7 >= 0) && (n8 < 0)) n8 = rank+1;
566   }
567 
568   ierr = PetscMalloc1(9,&dd->neighbors);CHKERRQ(ierr);
569 
570   dd->neighbors[0] = n0;
571   dd->neighbors[1] = n1;
572   dd->neighbors[2] = n2;
573   dd->neighbors[3] = n3;
574   dd->neighbors[4] = rank;
575   dd->neighbors[5] = n5;
576   dd->neighbors[6] = n6;
577   dd->neighbors[7] = n7;
578   dd->neighbors[8] = n8;
579 
580   if (stencil_type == DMDA_STENCIL_STAR) {
581     /* save corner processor numbers */
582     sn0 = n0; sn2 = n2; sn6 = n6; sn8 = n8;
583     n0  = n2 = n6 = n8 = -1;
584   }
585 
586   ierr = PetscMalloc1((Xe-Xs)*(Ye-Ys),&idx);CHKERRQ(ierr);
587 
588   nn = 0;
589   xbase = bases[rank];
590   for (i=1; i<=s_y; i++) {
591     if (n0 >= 0) { /* left below */
592       x_t = lx[n0 % m];
593       y_t = ly[(n0/m)];
594       s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x;
595       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
596     }
597 
598     if (n1 >= 0) { /* directly below */
599       x_t = x;
600       y_t = ly[(n1/m)];
601       s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t;
602       for (j=0; j<x_t; j++) idx[nn++] = s_t++;
603     } else if (by == DM_BOUNDARY_MIRROR) {
604       for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1)  + j;
605     }
606 
607     if (n2 >= 0) { /* right below */
608       x_t = lx[n2 % m];
609       y_t = ly[(n2/m)];
610       s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t;
611       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
612     }
613   }
614 
615   for (i=0; i<y; i++) {
616     if (n3 >= 0) { /* directly left */
617       x_t = lx[n3 % m];
618       /* y_t = y; */
619       s_t = bases[n3] + (i+1)*x_t - s_x;
620       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
621     } else if (bx == DM_BOUNDARY_MIRROR) {
622       for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j;
623     }
624 
625     for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */
626 
627     if (n5 >= 0) { /* directly right */
628       x_t = lx[n5 % m];
629       /* y_t = y; */
630       s_t = bases[n5] + (i)*x_t;
631       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
632     } else if (bx == DM_BOUNDARY_MIRROR) {
633       for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j;
634     }
635   }
636 
637   for (i=1; i<=s_y; i++) {
638     if (n6 >= 0) { /* left above */
639       x_t = lx[n6 % m];
640       /* y_t = ly[(n6/m)]; */
641       s_t = bases[n6] + (i)*x_t - s_x;
642       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
643     }
644 
645     if (n7 >= 0) { /* directly above */
646       x_t = x;
647       /* y_t = ly[(n7/m)]; */
648       s_t = bases[n7] + (i-1)*x_t;
649       for (j=0; j<x_t; j++) idx[nn++] = s_t++;
650     } else if (by == DM_BOUNDARY_MIRROR) {
651       for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1)  + j;
652     }
653 
654     if (n8 >= 0) { /* right above */
655       x_t = lx[n8 % m];
656       /* y_t = ly[(n8/m)]; */
657       s_t = bases[n8] + (i-1)*x_t;
658       for (j=0; j<s_x; j++) idx[nn++] = s_t++;
659     }
660   }
661 
662   ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_USE_POINTER,&from);CHKERRQ(ierr);
663   ierr = VecScatterCreate(global,from,local,to,&gtol);CHKERRQ(ierr);
664   ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)gtol);CHKERRQ(ierr);
665   ierr = ISDestroy(&to);CHKERRQ(ierr);
666   ierr = ISDestroy(&from);CHKERRQ(ierr);
667 
668   if (stencil_type == DMDA_STENCIL_STAR) {
669     n0 = sn0; n2 = sn2; n6 = sn6; n8 = sn8;
670   }
671 
672   if (((stencil_type == DMDA_STENCIL_STAR)  || (bx && bx != DM_BOUNDARY_PERIODIC) || (by && by != DM_BOUNDARY_PERIODIC))) {
673     /*
674         Recompute the local to global mappings, this time keeping the
675       information about the cross corner processor numbers and any ghosted
676       but not periodic indices.
677     */
678     nn    = 0;
679     xbase = bases[rank];
680     for (i=1; i<=s_y; i++) {
681       if (n0 >= 0) { /* left below */
682         x_t = lx[n0 % m];
683         y_t = ly[(n0/m)];
684         s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x;
685         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
686       } else if (xs-Xs > 0 && ys-Ys > 0) {
687         for (j=0; j<s_x; j++) idx[nn++] = -1;
688       }
689       if (n1 >= 0) { /* directly below */
690         x_t = x;
691         y_t = ly[(n1/m)];
692         s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t;
693         for (j=0; j<x_t; j++) idx[nn++] = s_t++;
694       } else if (ys-Ys > 0) {
695         if (by == DM_BOUNDARY_MIRROR) {
696           for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1)  + j;
697         } else {
698           for (j=0; j<x; j++) idx[nn++] = -1;
699         }
700       }
701       if (n2 >= 0) { /* right below */
702         x_t = lx[n2 % m];
703         y_t = ly[(n2/m)];
704         s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t;
705         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
706       } else if (Xe-xe> 0 && ys-Ys > 0) {
707         for (j=0; j<s_x; j++) idx[nn++] = -1;
708       }
709     }
710 
711     for (i=0; i<y; i++) {
712       if (n3 >= 0) { /* directly left */
713         x_t = lx[n3 % m];
714         /* y_t = y; */
715         s_t = bases[n3] + (i+1)*x_t - s_x;
716         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
717       } else if (xs-Xs > 0) {
718         if (bx == DM_BOUNDARY_MIRROR) {
719           for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j;
720         } else {
721           for (j=0; j<s_x; j++) idx[nn++] = -1;
722         }
723       }
724 
725       for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */
726 
727       if (n5 >= 0) { /* directly right */
728         x_t = lx[n5 % m];
729         /* y_t = y; */
730         s_t = bases[n5] + (i)*x_t;
731         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
732       } else if (Xe-xe > 0) {
733         if (bx == DM_BOUNDARY_MIRROR) {
734           for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j;
735         } else {
736           for (j=0; j<s_x; j++) idx[nn++] = -1;
737         }
738       }
739     }
740 
741     for (i=1; i<=s_y; i++) {
742       if (n6 >= 0) { /* left above */
743         x_t = lx[n6 % m];
744         /* y_t = ly[(n6/m)]; */
745         s_t = bases[n6] + (i)*x_t - s_x;
746         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
747       } else if (xs-Xs > 0 && Ye-ye > 0) {
748         for (j=0; j<s_x; j++) idx[nn++] = -1;
749       }
750       if (n7 >= 0) { /* directly above */
751         x_t = x;
752         /* y_t = ly[(n7/m)]; */
753         s_t = bases[n7] + (i-1)*x_t;
754         for (j=0; j<x_t; j++) idx[nn++] = s_t++;
755       } else if (Ye-ye > 0) {
756         if (by == DM_BOUNDARY_MIRROR) {
757           for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1)  + j;
758         } else {
759           for (j=0; j<x; j++) idx[nn++] = -1;
760         }
761       }
762       if (n8 >= 0) { /* right above */
763         x_t = lx[n8 % m];
764         /* y_t = ly[(n8/m)]; */
765         s_t = bases[n8] + (i-1)*x_t;
766         for (j=0; j<s_x; j++) idx[nn++] = s_t++;
767       } else if (Xe-xe > 0 && Ye-ye > 0) {
768         for (j=0; j<s_x; j++) idx[nn++] = -1;
769       }
770     }
771   }
772   /*
773      Set the local to global ordering in the global vector, this allows use
774      of VecSetValuesLocal().
775   */
776   ierr = ISLocalToGlobalMappingCreate(comm,dof,nn,idx,PETSC_OWN_POINTER,&da->ltogmap);CHKERRQ(ierr);
777   ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)da->ltogmap);CHKERRQ(ierr);
778 
779   ierr  = PetscFree2(bases,ldims);CHKERRQ(ierr);
780   dd->m = m;  dd->n  = n;
781   /* note petsc expects xs/xe/Xs/Xe to be multiplied by #dofs in many places */
782   dd->xs = xs*dof; dd->xe = xe*dof; dd->ys = ys; dd->ye = ye; dd->zs = 0; dd->ze = 1;
783   dd->Xs = Xs*dof; dd->Xe = Xe*dof; dd->Ys = Ys; dd->Ye = Ye; dd->Zs = 0; dd->Ze = 1;
784 
785   ierr = VecDestroy(&local);CHKERRQ(ierr);
786   ierr = VecDestroy(&global);CHKERRQ(ierr);
787 
788   dd->gtol      = gtol;
789   dd->base      = base;
790   da->ops->view = DMView_DA_2d;
791   dd->ltol      = NULL;
792   dd->ao        = NULL;
793   PetscFunctionReturn(0);
794 }
795 
796 /*@C
797    DMDACreate2d -  Creates an object that will manage the communication of  two-dimensional
798    regular array data that is distributed across some processors.
799 
800    Collective on MPI_Comm
801 
802    Input Parameters:
803 +  comm - MPI communicator
804 .  bx,by - type of ghost nodes the array have.
805          Use one of DM_BOUNDARY_NONE, DM_BOUNDARY_GHOSTED, DM_BOUNDARY_PERIODIC.
806 .  stencil_type - stencil type.  Use either DMDA_STENCIL_BOX or DMDA_STENCIL_STAR.
807 .  M,N - global dimension in each direction of the array
808 .  m,n - corresponding number of processors in each dimension
809          (or PETSC_DECIDE to have calculated)
810 .  dof - number of degrees of freedom per node
811 .  s - stencil width
812 -  lx, ly - arrays containing the number of nodes in each cell along
813            the x and y coordinates, or NULL. If non-null, these
814            must be of length as m and n, and the corresponding
815            m and n cannot be PETSC_DECIDE. The sum of the lx[] entries
816            must be M, and the sum of the ly[] entries must be N.
817 
818    Output Parameter:
819 .  da - the resulting distributed array object
820 
821    Options Database Key:
822 +  -dm_view - Calls DMView() at the conclusion of DMDACreate2d()
823 .  -da_grid_x <nx> - number of grid points in x direction, if M < 0
824 .  -da_grid_y <ny> - number of grid points in y direction, if N < 0
825 .  -da_processors_x <nx> - number of processors in x direction
826 .  -da_processors_y <ny> - number of processors in y direction
827 .  -da_refine_x <rx> - refinement ratio in x direction
828 .  -da_refine_y <ry> - refinement ratio in y direction
829 -  -da_refine <n> - refine the DMDA n times before creating, if M or N < 0
830 
831 
832    Level: beginner
833 
834    Notes:
835    The stencil type DMDA_STENCIL_STAR with width 1 corresponds to the
836    standard 5-pt stencil, while DMDA_STENCIL_BOX with width 1 denotes
837    the standard 9-pt stencil.
838 
839    The array data itself is NOT stored in the DMDA, it is stored in Vec objects;
840    The appropriate vector objects can be obtained with calls to DMCreateGlobalVector()
841    and DMCreateLocalVector() and calls to VecDuplicate() if more are needed.
842 
843    You must call DMSetUp() after this call before using this DM.
844 
845    If you wish to use the options database to change values in the DMDA call DMSetFromOptions() after this call
846    but before DMSetUp().
847 
848 .keywords: distributed array, create, two-dimensional
849 
850 .seealso: DMDestroy(), DMView(), DMDACreate1d(), DMDACreate3d(), DMGlobalToLocalBegin(), DMDAGetRefinementFactor(),
851           DMGlobalToLocalEnd(), DMLocalToGlobalBegin(), DMLocalToLocalBegin(), DMLocalToLocalEnd(), DMDASetRefinementFactor(),
852           DMDAGetInfo(), DMCreateGlobalVector(), DMCreateLocalVector(), DMDACreateNaturalVector(), DMLoad(), DMDAGetOwnershipRanges()
853 
854 @*/
855 
856 PetscErrorCode  DMDACreate2d(MPI_Comm comm,DMBoundaryType bx,DMBoundaryType by,DMDAStencilType stencil_type,
857                           PetscInt M,PetscInt N,PetscInt m,PetscInt n,PetscInt dof,PetscInt s,const PetscInt lx[],const PetscInt ly[],DM *da)
858 {
859   PetscErrorCode ierr;
860 
861   PetscFunctionBegin;
862   ierr = DMDACreate(comm, da);CHKERRQ(ierr);
863   ierr = DMSetDimension(*da, 2);CHKERRQ(ierr);
864   ierr = DMDASetSizes(*da, M, N, 1);CHKERRQ(ierr);
865   ierr = DMDASetNumProcs(*da, m, n, PETSC_DECIDE);CHKERRQ(ierr);
866   ierr = DMDASetBoundaryType(*da, bx, by, DM_BOUNDARY_NONE);CHKERRQ(ierr);
867   ierr = DMDASetDof(*da, dof);CHKERRQ(ierr);
868   ierr = DMDASetStencilType(*da, stencil_type);CHKERRQ(ierr);
869   ierr = DMDASetStencilWidth(*da, s);CHKERRQ(ierr);
870   ierr = DMDASetOwnershipRanges(*da, lx, ly, NULL);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873