xref: /petsc/src/dm/impls/stag/stag2d.c (revision 834855d6effb0d027771461c8e947ee1ce5a1e17)
1 /* Functions specific to the 2-dimensional implementation of DMStag */
2 #include <petsc/private/dmstagimpl.h> /*I  "petscdmstag.h"   I*/
3 
4 /*@
5   DMStagCreate2d - Create an object to manage data living on the elements, faces, and vertices of a parallelized regular 2D grid.
6 
7   Collective
8 
9   Input Parameters:
10 + comm         - MPI communicator
11 . bndx         - x boundary type, `DM_BOUNDARY_NONE`, `DM_BOUNDARY_PERIODIC`, or
12 `DM_BOUNDARY_GHOSTED`
13 . bndy         - y boundary type, `DM_BOUNDARY_NONE`, `DM_BOUNDARY_PERIODIC`, or `DM_BOUNDARY_GHOSTED`
14 . M            - global number of elements in x direction
15 . N            - global number of elements in y direction
16 . m            - number of ranks in the x direction (may be `PETSC_DECIDE`)
17 . n            - number of ranks in the y direction (may be `PETSC_DECIDE`)
18 . dof0         - number of degrees of freedom per vertex/0-cell
19 . dof1         - number of degrees of freedom per face/1-cell
20 . dof2         - number of degrees of freedom per element/2-cell
21 . stencilType  - ghost/halo region type: `DMSTAG_STENCIL_NONE`, `DMSTAG_STENCIL_BOX`, or `DMSTAG_STENCIL_STAR`
22 . stencilWidth - width, in elements, of halo/ghost region
23 . lx           - array of local x element counts, of length equal to `m`, summing to `M`, or `NULL`
24 - ly           - array of local y element counts, of length equal to `n`, summing to `N`, or `NULL`
25 
26   Output Parameter:
27 . dm - the new `DMSTAG` object
28 
29   Options Database Keys:
30 + -dm_view                                      - calls `DMViewFromOptions()` at the conclusion of `DMSetUp()`
31 . -stag_grid_x <nx>                             - number of elements in the x direction
32 . -stag_grid_y <ny>                             - number of elements in the y direction
33 . -stag_ranks_x <rx>                            - number of ranks in the x direction
34 . -stag_ranks_y <ry>                            - number of ranks in the y direction
35 . -stag_ghost_stencil_width                     - width of ghost region, in elements
36 . -stag_boundary_type_x <none,ghosted,periodic> - `DMBoundaryType` value
37 - -stag_boundary_type_y <none,ghosted,periodic> - `DMBoundaryType` value
38 
39   Level: beginner
40 
41   Notes:
42   You must call `DMSetUp()` after this call, before using the `DM`.
43   If you wish to use the options database (see the keys above) to change values in the `DMSTAG`, you must call
44   `DMSetFromOptions()` after this function but before `DMSetUp()`.
45 
46 .seealso: [](ch_stag), `DMSTAG`, `DMStagCreate1d()`, `DMStagCreate3d()`, `DMDestroy()`, `DMView()`, `DMCreateGlobalVector()`, `DMCreateLocalVector()`, `DMLocalToGlobalBegin()`, `DMDACreate2d()`
47 @*/
DMStagCreate2d(MPI_Comm comm,DMBoundaryType bndx,DMBoundaryType bndy,PetscInt M,PetscInt N,PetscInt m,PetscInt n,PetscInt dof0,PetscInt dof1,PetscInt dof2,DMStagStencilType stencilType,PetscInt stencilWidth,const PetscInt lx[],const PetscInt ly[],DM * dm)48 PetscErrorCode DMStagCreate2d(MPI_Comm comm, DMBoundaryType bndx, DMBoundaryType bndy, PetscInt M, PetscInt N, PetscInt m, PetscInt n, PetscInt dof0, PetscInt dof1, PetscInt dof2, DMStagStencilType stencilType, PetscInt stencilWidth, const PetscInt lx[], const PetscInt ly[], DM *dm)
49 {
50   PetscFunctionBegin;
51   PetscCall(DMCreate(comm, dm));
52   PetscCall(DMSetDimension(*dm, 2));
53   PetscCall(DMStagInitialize(bndx, bndy, DM_BOUNDARY_NONE, M, N, 0, m, n, 0, dof0, dof1, dof2, 0, stencilType, stencilWidth, lx, ly, NULL, *dm));
54   PetscFunctionReturn(PETSC_SUCCESS);
55 }
56 
DMStagRestrictSimple_2d(DM dmf,Vec xf_local,DM dmc,Vec xc_local)57 PETSC_INTERN PetscErrorCode DMStagRestrictSimple_2d(DM dmf, Vec xf_local, DM dmc, Vec xc_local)
58 {
59   PetscInt             Mf, Nf, Mc, Nc, factorx, factory, dof[3];
60   PetscInt             xc, yc, mc, nc, nExtraxc, nExtrayc, i, j, d;
61   PetscInt             idownleftf, ileftf, idownf, ielemf, idownleftc, ileftc, idownc, ielemc;
62   const PetscScalar ***arrf;
63   PetscScalar       ***arrc;
64 
65   PetscFunctionBegin;
66   PetscCall(DMStagGetGlobalSizes(dmf, &Mf, &Nf, NULL));
67   PetscCall(DMStagGetGlobalSizes(dmc, &Mc, &Nc, NULL));
68   factorx = Mf / Mc;
69   factory = Nf / Nc;
70   PetscCall(DMStagGetDOF(dmc, &dof[0], &dof[1], &dof[2], NULL));
71 
72   PetscCall(DMStagGetCorners(dmc, &xc, &yc, NULL, &mc, &nc, NULL, &nExtraxc, &nExtrayc, NULL));
73   PetscCall(VecZeroEntries(xc_local));
74   PetscCall(DMStagVecGetArray(dmf, xf_local, &arrf));
75   PetscCall(DMStagVecGetArray(dmc, xc_local, &arrc));
76   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_DOWN_LEFT, 0, &idownleftf));
77   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_LEFT, 0, &ileftf));
78   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_DOWN, 0, &idownf));
79   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_ELEMENT, 0, &ielemf));
80   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_DOWN_LEFT, 0, &idownleftc));
81   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_LEFT, 0, &ileftc));
82   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_DOWN, 0, &idownc));
83   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_ELEMENT, 0, &ielemc));
84 
85   for (d = 0; d < dof[0]; ++d)
86     for (j = yc; j < yc + nc + nExtrayc; ++j)
87       for (i = xc; i < xc + mc + nExtraxc; ++i) {
88         const PetscInt ii = factorx * i, jj = factory * j;
89 
90         arrc[j][i][idownleftc + d] = arrf[jj][ii][idownleftf + d];
91       }
92 
93   for (d = 0; d < dof[1]; ++d)
94     for (j = yc; j < yc + nc; ++j)
95       for (i = xc; i < xc + mc + nExtraxc; ++i) {
96         const PetscInt ii = factorx * i, jj = factory * j + factory / 2;
97 
98         if (factory % 2 == 0) arrc[j][i][ileftc + d] = 0.5 * (arrf[jj - 1][ii][ileftf + d] + arrf[jj][ii][ileftf + d]);
99         else arrc[j][i][ileftc + d] = arrf[jj][ii][ileftf + d];
100       }
101 
102   for (d = 0; d < dof[1]; ++d)
103     for (j = yc; j < yc + nc + nExtrayc; ++j)
104       for (i = xc; i < xc + mc; ++i) {
105         const PetscInt ii = factorx * i + factorx / 2, jj = factory * j;
106 
107         if (factorx % 2 == 0) arrc[j][i][idownc + d] = 0.5 * (arrf[jj][ii - 1][idownf + d] + arrf[jj][ii][idownf + d]);
108         else arrc[j][i][idownc + d] = arrf[jj][ii][idownf + d];
109       }
110 
111   for (d = 0; d < dof[2]; ++d)
112     for (j = yc; j < yc + nc; ++j)
113       for (i = xc; i < xc + mc; ++i) {
114         const PetscInt ii = factorx * i + factorx / 2, jj = factory * j + factory / 2;
115 
116         if (factorx % 2 == 0 && factory % 2 == 0) arrc[j][i][ielemc + d] = 0.25 * (arrf[jj - 1][ii - 1][ielemf + d] + arrf[jj][ii - 1][ielemf + d] + arrf[jj - 1][ii][ielemf + d] + arrf[jj][ii][ielemf + d]);
117         else if (factorx % 2 == 0) arrc[j][i][ielemc + d] = 0.5 * (arrf[jj - 1][ii - 1][ielemf + d] + arrf[jj][ii - 1][ielemf + d]);
118         else if (factory % 2 == 0) arrc[j][i][ielemc + d] = 0.5 * (arrf[jj - 1][ii - 1][ielemf + d] + arrf[jj - 1][ii][ielemf + d]);
119         else arrc[j][i][ielemc + d] = arrf[jj][ii][ielemf + d];
120       }
121 
122   PetscCall(DMStagVecRestoreArray(dmf, xf_local, &arrf));
123   PetscCall(DMStagVecRestoreArray(dmc, xc_local, &arrc));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
DMStagSetUniformCoordinatesExplicit_2d(DM dm,PetscReal xmin,PetscReal xmax,PetscReal ymin,PetscReal ymax)127 PETSC_INTERN PetscErrorCode DMStagSetUniformCoordinatesExplicit_2d(DM dm, PetscReal xmin, PetscReal xmax, PetscReal ymin, PetscReal ymax)
128 {
129   DM_Stag       *stagCoord;
130   DM             dmCoord;
131   Vec            coordLocal;
132   PetscReal      h[2], min[2];
133   PetscScalar ***arr;
134   PetscInt       ind[2], start_ghost[2], n_ghost[2], s, c;
135   PetscInt       idownleft, idown, ileft, ielement;
136 
137   PetscFunctionBegin;
138   PetscCall(DMGetCoordinateDM(dm, &dmCoord));
139   stagCoord = (DM_Stag *)dmCoord->data;
140   for (s = 0; s < 3; ++s) {
141     PetscCheck(stagCoord->dof[s] == 0 || stagCoord->dof[s] == 2, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Coordinate DM in 2 dimensions must have 0 or 2 dof on each stratum, but stratum %" PetscInt_FMT " has %" PetscInt_FMT " dof", s,
142                stagCoord->dof[s]);
143   }
144   PetscCall(DMCreateLocalVector(dmCoord, &coordLocal));
145 
146   PetscCall(DMStagVecGetArray(dmCoord, coordLocal, &arr));
147   if (stagCoord->dof[0]) PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_DOWN_LEFT, 0, &idownleft));
148   if (stagCoord->dof[1]) {
149     PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_DOWN, 0, &idown));
150     PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_LEFT, 0, &ileft));
151   }
152   if (stagCoord->dof[2]) PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_ELEMENT, 0, &ielement));
153   PetscCall(DMStagGetGhostCorners(dmCoord, &start_ghost[0], &start_ghost[1], NULL, &n_ghost[0], &n_ghost[1], NULL));
154 
155   min[0] = xmin;
156   min[1] = ymin;
157   h[0]   = (xmax - xmin) / stagCoord->N[0];
158   h[1]   = (ymax - ymin) / stagCoord->N[1];
159 
160   for (ind[1] = start_ghost[1]; ind[1] < start_ghost[1] + n_ghost[1]; ++ind[1]) {
161     for (ind[0] = start_ghost[0]; ind[0] < start_ghost[0] + n_ghost[0]; ++ind[0]) {
162       if (stagCoord->dof[0]) {
163         const PetscReal offs[2] = {0.0, 0.0};
164         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][idownleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
165       }
166       if (stagCoord->dof[1]) {
167         const PetscReal offs[2] = {0.5, 0.0};
168         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][idown + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
169       }
170       if (stagCoord->dof[1]) {
171         const PetscReal offs[2] = {0.0, 0.5};
172         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][ileft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
173       }
174       if (stagCoord->dof[2]) {
175         const PetscReal offs[2] = {0.5, 0.5};
176         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][ielement + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
177       }
178     }
179   }
180   PetscCall(DMStagVecRestoreArray(dmCoord, coordLocal, &arr));
181   PetscCall(DMSetCoordinatesLocal(dm, coordLocal));
182   PetscCall(VecDestroy(&coordLocal));
183   PetscFunctionReturn(PETSC_SUCCESS);
184 }
185 
186 /* Helper functions used in DMSetUp_Stag() */
187 static PetscErrorCode DMStagSetUpBuildRankGrid_2d(DM);
188 static PetscErrorCode DMStagSetUpBuildNeighbors_2d(DM);
189 static PetscErrorCode DMStagSetUpBuildGlobalOffsets_2d(DM, PetscInt **);
190 static PetscErrorCode DMStagComputeLocationOffsets_2d(DM);
191 
DMSetUp_Stag_2d(DM dm)192 PETSC_INTERN PetscErrorCode DMSetUp_Stag_2d(DM dm)
193 {
194   DM_Stag *const stag = (DM_Stag *)dm->data;
195   PetscMPIInt    size, rank;
196   PetscInt       i, j, d, entriesPerElementRowGhost, entriesPerCorner, entriesPerFace, entriesPerElementRow;
197   MPI_Comm       comm;
198   PetscInt      *globalOffsets;
199   PetscBool      star, dummyStart[2], dummyEnd[2];
200   const PetscInt dim = 2;
201 
202   PetscFunctionBegin;
203   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
204   PetscCallMPI(MPI_Comm_size(comm, &size));
205   PetscCallMPI(MPI_Comm_rank(comm, &rank));
206 
207   /* Rank grid sizes (populates stag->nRanks) */
208   PetscCall(DMStagSetUpBuildRankGrid_2d(dm));
209 
210   /* Determine location of rank in grid (these get extra boundary points on the last element)
211      Order is x-fast, as usual */
212   stag->rank[0] = rank % stag->nRanks[0];
213   stag->rank[1] = rank / stag->nRanks[0];
214   for (i = 0; i < dim; ++i) {
215     stag->firstRank[i] = PetscNot(stag->rank[i]);
216     stag->lastRank[i]  = (PetscBool)(stag->rank[i] == stag->nRanks[i] - 1);
217   }
218 
219   /* Determine Locally owned region
220 
221    Divide equally, giving lower ranks in each dimension and extra element if needbe.
222 
223    Note that this uses O(P) storage. If this ever becomes an issue, this could
224    be refactored to not keep this data around.  */
225   for (i = 0; i < dim; ++i) {
226     if (!stag->l[i]) {
227       const PetscInt Ni = stag->N[i], nRanksi = stag->nRanks[i];
228       PetscCall(PetscMalloc1(stag->nRanks[i], &stag->l[i]));
229       for (j = 0; j < stag->nRanks[i]; ++j) stag->l[i][j] = Ni / nRanksi + ((Ni % nRanksi) > j);
230     }
231   }
232 
233   /* Retrieve local size in stag->n */
234   for (i = 0; i < dim; ++i) stag->n[i] = stag->l[i][stag->rank[i]];
235   if (PetscDefined(USE_DEBUG)) {
236     for (i = 0; i < dim; ++i) {
237       PetscInt Ncheck, j;
238       Ncheck = 0;
239       for (j = 0; j < stag->nRanks[i]; ++j) Ncheck += stag->l[i][j];
240       PetscCheck(Ncheck == stag->N[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local sizes in dimension %" PetscInt_FMT " don't add up. %" PetscInt_FMT " != %" PetscInt_FMT, i, Ncheck, stag->N[i]);
241     }
242   }
243 
244   /* Compute starting elements */
245   for (i = 0; i < dim; ++i) {
246     stag->start[i] = 0;
247     for (j = 0; j < stag->rank[i]; ++j) stag->start[i] += stag->l[i][j];
248   }
249 
250   /* Determine ranks of neighbors, using DMDA's convention
251 
252      n6 n7 n8
253      n3    n5
254      n0 n1 n2                                               */
255   PetscCall(DMStagSetUpBuildNeighbors_2d(dm));
256 
257   /* Determine whether the ghost region includes dummies or not. This is currently
258        equivalent to having a non-periodic boundary. If not, then
259        ghostOffset{Start,End}[d] elements correspond to elements on the neighbor.
260        If true, then
261        - at the start, there are ghostOffsetStart[d] ghost elements
262        - at the end, there is a layer of extra "physical" points inside a layer of
263          ghostOffsetEnd[d] ghost elements
264        Note that this computation should be updated if any boundary types besides
265        NONE, GHOSTED, and PERIODIC are supported.  */
266   for (d = 0; d < 2; ++d) dummyStart[d] = (PetscBool)(stag->firstRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
267   for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
268 
269   /* Define useful sizes */
270   stag->entriesPerElement = stag->dof[0] + 2 * stag->dof[1] + stag->dof[2];
271   entriesPerFace          = stag->dof[0] + stag->dof[1];
272   entriesPerCorner        = stag->dof[0];
273   entriesPerElementRow    = stag->n[0] * stag->entriesPerElement + (dummyEnd[0] ? entriesPerFace : 0);
274   stag->entries           = stag->n[1] * entriesPerElementRow + (dummyEnd[1] ? stag->n[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
275 
276   /* Compute offsets for each rank into global vectors
277      This again requires O(P) storage, which could be replaced with some global
278      communication.  */
279   PetscCall(DMStagSetUpBuildGlobalOffsets_2d(dm, &globalOffsets));
280 
281   for (d = 0; d < dim; ++d)
282     PetscCheck(stag->boundaryType[d] == DM_BOUNDARY_NONE || stag->boundaryType[d] == DM_BOUNDARY_PERIODIC || stag->boundaryType[d] == DM_BOUNDARY_GHOSTED, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported boundary type");
283 
284   /* Define ghosted/local sizes */
285   PetscCheck(stag->stencilType == DMSTAG_STENCIL_NONE || !(stag->n[0] < stag->stencilWidth || stag->n[1] < stag->stencilWidth), PETSC_COMM_SELF, PETSC_ERR_SUP, "DMStag 2d setup does not support local sizes (%" PetscInt_FMT " x %" PetscInt_FMT ") smaller than the elementwise stencil width (%" PetscInt_FMT ")",
286              stag->n[0], stag->n[1], stag->stencilWidth);
287   for (d = 0; d < dim; ++d) {
288     switch (stag->boundaryType[d]) {
289     case DM_BOUNDARY_NONE:
290       /* Note: for a elements-only DMStag, the extra elements on the faces aren't necessary but we include them anyway */
291       switch (stag->stencilType) {
292       case DMSTAG_STENCIL_NONE: /* only the extra one on the right/top faces */
293         stag->nGhost[d]     = stag->n[d];
294         stag->startGhost[d] = stag->start[d];
295         if (stag->lastRank[d]) stag->nGhost[d] += 1;
296         break;
297       case DMSTAG_STENCIL_STAR: /* allocate the corners but don't use them */
298       case DMSTAG_STENCIL_BOX:
299         stag->nGhost[d]     = stag->n[d];
300         stag->startGhost[d] = stag->start[d];
301         if (!stag->firstRank[d]) {
302           stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
303           stag->startGhost[d] -= stag->stencilWidth;
304         }
305         if (!stag->lastRank[d]) {
306           stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
307         } else {
308           stag->nGhost[d] += 1; /* one element on the boundary to complete blocking */
309         }
310         break;
311       default:
312         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
313       }
314       break;
315     case DM_BOUNDARY_GHOSTED:
316       switch (stag->stencilType) {
317       case DMSTAG_STENCIL_NONE:
318         stag->startGhost[d] = stag->start[d];
319         stag->nGhost[d]     = stag->n[d] + (stag->lastRank[d] ? 1 : 0);
320         break;
321       case DMSTAG_STENCIL_STAR:
322       case DMSTAG_STENCIL_BOX:
323         stag->startGhost[d] = stag->start[d] - stag->stencilWidth; /* This value may be negative */
324         stag->nGhost[d]     = stag->n[d] + 2 * stag->stencilWidth + (stag->lastRank[d] && stag->stencilWidth == 0 ? 1 : 0);
325         break;
326       default:
327         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
328       }
329       break;
330     case DM_BOUNDARY_PERIODIC:
331       switch (stag->stencilType) {
332       case DMSTAG_STENCIL_NONE: /* only the extra one on the right/top faces */
333         stag->nGhost[d]     = stag->n[d];
334         stag->startGhost[d] = stag->start[d];
335         break;
336       case DMSTAG_STENCIL_STAR:
337       case DMSTAG_STENCIL_BOX:
338         stag->nGhost[d]     = stag->n[d] + 2 * stag->stencilWidth;
339         stag->startGhost[d] = stag->start[d] - stag->stencilWidth;
340         break;
341       default:
342         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
343       }
344       break;
345     default:
346       SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported boundary type in dimension %" PetscInt_FMT, d);
347     }
348   }
349   stag->entriesGhost        = stag->nGhost[0] * stag->nGhost[1] * stag->entriesPerElement;
350   entriesPerElementRowGhost = stag->nGhost[0] * stag->entriesPerElement;
351 
352   /* Create global-->local VecScatter and local->global ISLocalToGlobalMapping
353 
354      We iterate over all local points twice. First, we iterate over each neighbor, populating
355      1. idxLocal[] : the subset of points, in local numbering ("S" from 0 on all points including ghosts), which correspond to global points. That is, the set of all non-dummy points in the ghosted representation
356      2. idxGlobal[]: the corresponding global points, in global numbering (Nested "S"s - ranks then non-ghost points in each rank)
357 
358      Next, we iterate over all points in the local ordering, populating
359      3. idxGlobalAll[] : entry i is the global point corresponding to local point i, or -1 if local point i is a dummy.
360 
361      Note further here that the local/ghosted vectors:
362      - Are always an integral number of elements-worth of points, in all directions.
363      - Contain three flavors of points:
364      1. Points which "live here" in the global representation
365      2. Ghost points which correspond to points on other ranks in the global representation
366      3. Ghost points, which we call "dummy points," which do not correspond to any point in the global representation
367 
368      Dummy ghost points arise in at least three ways:
369      1. As padding for the right, top, and front physical boundaries, to complete partial elements
370      2. As unused space in the "corners" on interior ranks when using a star stencil
371      3. As additional work space on all physical boundaries, when DM_BOUNDARY_GHOSTED is used
372 
373      Note that, because of the boundary dummies,
374      with a stencil width of zero, on 1 rank, local and global vectors
375      are still different!
376 
377      We assume that the size on each rank is greater than or equal to the
378      stencil width.
379      */
380 
381   /* Check stencil type */
382   PetscCheck(stag->stencilType == DMSTAG_STENCIL_NONE || stag->stencilType == DMSTAG_STENCIL_BOX || stag->stencilType == DMSTAG_STENCIL_STAR, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported stencil type %s", DMStagStencilTypes[stag->stencilType]);
383   star = (PetscBool)(stag->stencilType == DMSTAG_STENCIL_STAR || stag->stencilType == DMSTAG_STENCIL_NONE);
384 
385   {
386     PetscInt *idxLocal, *idxGlobal, *idxGlobalAll;
387     PetscInt  count, countAll, entriesToTransferTotal, i, j, d, ghostOffsetStart[2], ghostOffsetEnd[2];
388     IS        isLocal, isGlobal;
389     PetscInt  jghost, ighost;
390     PetscInt  nNeighbors[9][2];
391     PetscBool nextToDummyEnd[2];
392 
393     /* Compute numbers of elements on each neighbor */
394     for (i = 0; i < 9; ++i) {
395       const PetscInt neighborRank = stag->neighbors[i];
396       if (neighborRank >= 0) { /* note we copy the values for our own rank (neighbor 4) */
397         nNeighbors[i][0] = stag->l[0][neighborRank % stag->nRanks[0]];
398         nNeighbors[i][1] = stag->l[1][neighborRank / stag->nRanks[0]];
399       } else {
400         nNeighbors[i][0] = 0;
401         nNeighbors[i][1] = 0;
402       }
403     }
404 
405     /* These offsets should always be non-negative, and describe how many
406        ghost elements exist at each boundary. These are not always equal to the stencil width,
407        because we may have different numbers of ghost elements at the boundaries. In particular,
408        we always have at least one ghost (dummy) element at the right/top/front. */
409     for (d = 0; d < 2; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
410     for (d = 0; d < 2; ++d) ghostOffsetEnd[d] = stag->startGhost[d] + stag->nGhost[d] - (stag->start[d] + stag->n[d]);
411 
412     /* Compute whether the next rank has an extra point (only used in x direction) */
413     for (d = 0; d < 2; ++d) nextToDummyEnd[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->rank[d] == stag->nRanks[d] - 2);
414 
415     /* Compute the number of local entries which correspond to any global entry */
416     {
417       PetscInt nNonDummyGhost[2];
418       for (d = 0; d < 2; ++d) nNonDummyGhost[d] = stag->nGhost[d] - (dummyStart[d] ? ghostOffsetStart[d] : 0) - (dummyEnd[d] ? ghostOffsetEnd[d] : 0);
419       if (star) {
420         entriesToTransferTotal = (nNonDummyGhost[0] * stag->n[1] + stag->n[0] * nNonDummyGhost[1] - stag->n[0] * stag->n[1]) * stag->entriesPerElement + (dummyEnd[0] ? nNonDummyGhost[1] * entriesPerFace : 0) + (dummyEnd[1] ? nNonDummyGhost[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
421       } else {
422         entriesToTransferTotal = nNonDummyGhost[0] * nNonDummyGhost[1] * stag->entriesPerElement + (dummyEnd[0] ? nNonDummyGhost[1] * entriesPerFace : 0) + (dummyEnd[1] ? nNonDummyGhost[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
423       }
424     }
425 
426     /* Allocate arrays to populate */
427     PetscCall(PetscMalloc1(entriesToTransferTotal, &idxLocal));
428     PetscCall(PetscMalloc1(entriesToTransferTotal, &idxGlobal));
429 
430     /* Counts into idxLocal/idxGlobal */
431     count = 0;
432 
433     /* Here and below, we work with (i,j) describing element numbers within a neighboring rank's global ordering,
434        to be offset by that rank's global offset,
435        and (ighost,jghost) referring to element numbers within this ranks local (ghosted) ordering */
436 
437     /* Neighbor 0 (down left) */
438     if (!star && !dummyStart[0] && !dummyStart[1]) {
439       const PetscInt        neighbor                     = 0;
440       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
441       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
442       const PetscInt        entriesPerElementRowNeighbor = stag->entriesPerElement * nNeighbor[0];
443       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
444         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
445         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
446           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
447           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
448             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
449             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
450           }
451         }
452       }
453     }
454 
455     /* Neighbor 1 (down) */
456     if (!dummyStart[1]) {
457       /* We may be a ghosted boundary in x, in which case the neighbor is also */
458       const PetscInt        neighbor                     = 1;
459       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
460       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
461       const PetscInt        entriesPerElementRowNeighbor = entriesPerElementRow; /* same as here */
462       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
463         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
464         for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
465           const PetscInt i = ighost - ghostOffsetStart[0];
466           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
467             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
468             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
469           }
470         }
471         if (dummyEnd[0]) {
472           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0];
473           const PetscInt i      = stag->n[0];
474           for (d = 0; d < stag->dof[0]; ++d, ++count) { /* Vertex */
475             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
476             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
477           }
478           for (d = 0; d < stag->dof[1]; ++d, ++count) { /* Face */
479             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + stag->dof[0] + d;
480             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
481           }
482         }
483       }
484     }
485 
486     /* Neighbor 2 (down right) */
487     if (!star && !dummyEnd[0] && !dummyStart[1]) {
488       const PetscInt        neighbor                     = 2;
489       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
490       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
491       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
492       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
493         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
494         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
495           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
496           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
497             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
498             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
499           }
500         }
501       }
502     }
503 
504     /* Neighbor 3 (left) */
505     if (!dummyStart[0]) {
506       /* Our neighbor is never a ghosted boundary in x, but we may be
507          Here, we may be a ghosted boundary in y and thus so will our neighbor be */
508       const PetscInt        neighbor                     = 3;
509       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
510       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
511       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement;
512       for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
513         const PetscInt j = jghost - ghostOffsetStart[1];
514         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
515           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
516           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
517             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
518             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
519           }
520         }
521       }
522       if (dummyEnd[1]) {
523         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1];
524         const PetscInt j      = stag->n[1];
525         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
526           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
527           for (d = 0; d < entriesPerFace; ++d, ++count) {                                                /* only vertices and horizontal face (which are the first dof) */
528             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * entriesPerFace + d; /* i moves by face here */
529             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
530           }
531         }
532       }
533     }
534 
535     /* Interior/Resident-here-in-global elements ("Neighbor 4" - same rank)
536        *including* entries from boundary dummy elements */
537     {
538       const PetscInt neighbor     = 4;
539       const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
540       for (j = 0; j < stag->n[1]; ++j) {
541         const PetscInt jghost = j + ghostOffsetStart[1];
542         for (i = 0; i < stag->n[0]; ++i) {
543           const PetscInt ighost = i + ghostOffsetStart[0];
544           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
545             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
546             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
547           }
548         }
549         if (dummyEnd[0]) {
550           const PetscInt ighost = i + ghostOffsetStart[0];
551           i                     = stag->n[0];
552           for (d = 0; d < stag->dof[0]; ++d, ++count) { /* vertex first */
553             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
554             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
555           }
556           for (d = 0; d < stag->dof[1]; ++d, ++count) { /* then left edge (skipping bottom face) */
557             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + stag->dof[0] + d;
558             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
559           }
560         }
561       }
562       if (dummyEnd[1]) {
563         const PetscInt jghost = j + ghostOffsetStart[1];
564         j                     = stag->n[1];
565         for (i = 0; i < stag->n[0]; ++i) {
566           const PetscInt ighost = i + ghostOffsetStart[0];
567           for (d = 0; d < entriesPerFace; ++d, ++count) {                                        /* vertex and bottom face (which are the first entries) */
568             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
569             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
570           }
571         }
572         if (dummyEnd[0]) {
573           const PetscInt ighost = i + ghostOffsetStart[0];
574           i                     = stag->n[0];
575           for (d = 0; d < entriesPerCorner; ++d, ++count) {                                      /* vertex only */
576             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
577             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
578           }
579         }
580       }
581     }
582 
583     /* Neighbor 5 (right) */
584     if (!dummyEnd[0]) {
585       /* We can never be right boundary, but we may be a top boundary, along with the right neighbor */
586       const PetscInt        neighbor                     = 5;
587       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
588       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
589       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
590       for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
591         const PetscInt j = jghost - ghostOffsetStart[1];
592         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
593           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
594           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
595             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
596             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
597           }
598         }
599       }
600       if (dummyEnd[1]) {
601         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1];
602         const PetscInt j      = nNeighbor[1];
603         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
604           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
605           for (d = 0; d < entriesPerFace; ++d, ++count) {                                                /* only vertices and horizontal face (which are the first dof) */
606             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * entriesPerFace + d; /* Note i increment by entriesPerFace */
607             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
608           }
609         }
610       }
611     }
612 
613     /* Neighbor 6 (up left) */
614     if (!star && !dummyStart[0] && !dummyEnd[1]) {
615       /* We can never be a top boundary, but our neighbor may be
616        We may be a right boundary, but our neighbor cannot be */
617       const PetscInt        neighbor                     = 6;
618       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
619       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
620       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement;
621       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
622         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
623         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
624           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
625           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
626             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
627             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
628           }
629         }
630       }
631     }
632 
633     /* Neighbor 7 (up) */
634     if (!dummyEnd[1]) {
635       /* We cannot be the last rank in y, though our neighbor may be
636        We may be the last rank in x, in which case our neighbor is also */
637       const PetscInt        neighbor                     = 7;
638       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
639       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
640       const PetscInt        entriesPerElementRowNeighbor = entriesPerElementRow; /* same as here */
641       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
642         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
643         for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
644           const PetscInt i = ighost - ghostOffsetStart[0];
645           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
646             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
647             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
648           }
649         }
650         if (dummyEnd[0]) {
651           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0];
652           const PetscInt i      = nNeighbor[0];
653           for (d = 0; d < stag->dof[0]; ++d, ++count) { /* Vertex */
654             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
655             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
656           }
657           for (d = 0; d < stag->dof[1]; ++d, ++count) { /* Face */
658             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + stag->dof[0] + d;
659             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
660           }
661         }
662       }
663     }
664 
665     /* Neighbor 8 (up right) */
666     if (!star && !dummyEnd[0] && !dummyEnd[1]) {
667       /* We can never be a ghosted boundary
668          Our neighbor may be a top boundary, a right boundary, or both */
669       const PetscInt        neighbor                     = 8;
670       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
671       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
672       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
673       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
674         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
675         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
676           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
677           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
678             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
679             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
680           }
681         }
682       }
683     }
684 
685     PetscCheck(count == entriesToTransferTotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of entries computed in gtol (%" PetscInt_FMT ") is not as expected (%" PetscInt_FMT ")", count, entriesToTransferTotal);
686 
687     /* Create Local and Global ISs (transferring pointer ownership) */
688     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), entriesToTransferTotal, idxLocal, PETSC_OWN_POINTER, &isLocal));
689     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), entriesToTransferTotal, idxGlobal, PETSC_OWN_POINTER, &isGlobal));
690 
691     /* Create stag->gtol. The order is computed as PETSc ordering, and doesn't include dummy entries */
692     {
693       Vec local, global;
694       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm), 1, stag->entries, PETSC_DECIDE, NULL, &global));
695       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, PetscMax(stag->entriesPerElement, 1), stag->entriesGhost, NULL, &local));
696       PetscCall(VecScatterCreate(global, isGlobal, local, isLocal, &stag->gtol));
697       PetscCall(VecDestroy(&global));
698       PetscCall(VecDestroy(&local));
699     }
700 
701     /* Destroy ISs */
702     PetscCall(ISDestroy(&isLocal));
703     PetscCall(ISDestroy(&isGlobal));
704 
705     /* Next, we iterate over the local entries  again, in local order, recording the global entry to which each maps,
706        or -1 if there is none */
707     PetscCall(PetscMalloc1(stag->entriesGhost, &idxGlobalAll));
708 
709     countAll = 0;
710 
711     /* Loop over rows 1/3 : down */
712     if (!dummyStart[1]) {
713       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
714         /* Loop over columns 1/3 : down left */
715         if (!star && !dummyStart[0]) {
716           const PetscInt        neighbor     = 0;
717           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
718           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
719           const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost; /* Note: this is actually the same value for the whole row of ranks below, so recomputing it for the next two ranks is redundant, and one could even get rid of jghost entirely if desired */
720           const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement;
721           for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
722             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
723           }
724         } else {
725           /* Down Left dummies */
726           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
727             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
728           }
729         }
730 
731         /* Loop over columns 2/3 : down middle */
732         {
733           const PetscInt        neighbor     = 1;
734           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
735           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
736           const PetscInt        j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
737           const PetscInt        eprNeighbor  = entriesPerElementRow; /* same as here */
738           for (i = 0; i < nNeighbor[0]; ++i) {
739             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
740           }
741         }
742 
743         /* Loop over columns 3/3 : down right */
744         if (!star && !dummyEnd[0]) {
745           const PetscInt        neighbor     = 2;
746           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
747           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
748           const PetscInt        j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
749           const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
750           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
751             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
752           }
753         } else if (dummyEnd[0]) {
754           /* Down right partial dummy elements, living on the *down* rank */
755           const PetscInt        neighbor     = 1;
756           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
757           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
758           const PetscInt        j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
759           const PetscInt        eprNeighbor  = entriesPerElementRow; /* same as here */
760           PetscInt              dGlobal;
761           i = nNeighbor[0];
762           for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
763           for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy down face point */
764           for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
765           for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy element point */
766           ++i;
767           for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
768             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
769           }
770         } else {
771           /* Down Right dummies */
772           for (ighost = 0; ighost < ghostOffsetEnd[0]; ++ighost) {
773             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
774           }
775         }
776       }
777     } else {
778       /* Down dummies row */
779       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
780         for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
781           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
782         }
783       }
784     }
785 
786     /* Loop over rows 2/3 : center */
787     for (j = 0; j < stag->n[1]; ++j) {
788       /* Loop over columns 1/3 : left */
789       if (!dummyStart[0]) {
790         const PetscInt        neighbor     = 3;
791         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
792         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
793         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
794         for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
795           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
796         }
797       } else {
798         /* (Middle) Left dummies */
799         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
800           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
801         }
802       }
803 
804       /* Loop over columns 2/3 : here (the "neighbor" is ourselves, here) */
805       {
806         const PetscInt neighbor     = 4;
807         const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
808         const PetscInt eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
809         for (i = 0; i < stag->n[0]; ++i) {
810           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
811         }
812       }
813 
814       /* Loop over columns 3/3 : right */
815       if (!dummyEnd[0]) {
816         const PetscInt        neighbor     = 5;
817         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
818         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
819         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
820         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
821           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
822         }
823       } else {
824         /* -1's for right layer of partial dummies, living on *this* rank */
825         const PetscInt        neighbor     = 4;
826         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
827         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
828         const PetscInt        eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
829         PetscInt              dGlobal;
830         i = nNeighbor[0];
831         for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
832         for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy down face point */
833         for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
834         for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy element point */
835         ++i;
836         for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
837           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
838         }
839       }
840     }
841 
842     /* Loop over rows 3/3 : up */
843     if (!dummyEnd[1]) {
844       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
845         /* Loop over columns 1/3 : up left */
846         if (!star && !dummyStart[0]) {
847           const PetscInt        neighbor     = 6;
848           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
849           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
850           const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
851           for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
852             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
853           }
854         } else {
855           /* Up Left dummies */
856           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
857             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
858           }
859         }
860 
861         /* Loop over columns 2/3 : up */
862         {
863           const PetscInt        neighbor     = 7;
864           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
865           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
866           const PetscInt        eprNeighbor  = entriesPerElementRow; /* Same as here */
867           for (i = 0; i < nNeighbor[0]; ++i) {
868             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
869           }
870         }
871 
872         /* Loop over columns 3/3 : up right */
873         if (!star && !dummyEnd[0]) {
874           const PetscInt        neighbor     = 8;
875           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
876           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
877           const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
878           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
879             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
880           }
881         } else if (dummyEnd[0]) {
882           /* -1's for right layer of partial dummies, living on rank above */
883           const PetscInt        neighbor     = 7;
884           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
885           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
886           const PetscInt        eprNeighbor  = entriesPerElementRow; /* Same as here */
887           PetscInt              dGlobal;
888           i = nNeighbor[0];
889           for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
890           for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy down face point */
891           for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
892           for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy element point */
893           ++i;
894           for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
895             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
896           }
897         } else {
898           /* Up Right dummies */
899           for (ighost = 0; ighost < ghostOffsetEnd[0]; ++ighost) {
900             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
901           }
902         }
903       }
904     } else {
905       j = stag->n[1];
906       /* Top layer of partial dummies */
907 
908       /* up left partial dummies layer : Loop over columns 1/3 : living on *left* neighbor */
909       if (!dummyStart[0]) {
910         const PetscInt        neighbor     = 3;
911         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
912         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
913         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
914         for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
915           for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */
916           for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;                                                               /* dummy left face and element points */
917         }
918       } else {
919         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
920           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
921         }
922       }
923 
924       /* up partial dummies layer : Loop over columns 2/3 : living on *this* rank */
925       {
926         const PetscInt neighbor     = 4;
927         const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
928         const PetscInt eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
929         for (i = 0; i < stag->n[0]; ++i) {
930           for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */
931           for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;                                                               /* dummy left face and element points */
932         }
933       }
934 
935       if (!dummyEnd[0]) {
936         /* up right partial dummies layer : Loop over columns 3/3 :  living on *right* neighbor */
937         const PetscInt        neighbor     = 5;
938         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
939         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
940         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
941         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
942           for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */
943           for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;                                                               /* dummy left face and element points */
944         }
945       } else {
946         /* Top partial dummies layer : Loop over columns 3/3 : right, living *here* */
947         const PetscInt neighbor     = 4;
948         const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
949         const PetscInt eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
950         i                           = stag->n[0];
951         for (d = 0; d < stag->dof[0]; ++d, ++countAll) {                                    /* Note just the vertex here */
952           idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */
953         }
954         for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy bottom face, left face and element points */
955         ++i;
956         for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
957           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
958         }
959       }
960       ++j;
961       /* Additional top dummy layers */
962       for (; j < stag->n[1] + ghostOffsetEnd[1]; ++j) {
963         for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
964           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
965         }
966       }
967     }
968 
969     /* Create local-to-global map (in local ordering, includes maps to -1 for dummy points) */
970     PetscCall(ISLocalToGlobalMappingCreate(comm, 1, stag->entriesGhost, idxGlobalAll, PETSC_OWN_POINTER, &dm->ltogmap));
971   }
972 
973   /* In special cases, create a dedicated injective local-to-global map */
974   if ((stag->boundaryType[0] == DM_BOUNDARY_PERIODIC && stag->nRanks[0] == 1) || (stag->boundaryType[1] == DM_BOUNDARY_PERIODIC && stag->nRanks[1] == 1)) PetscCall(DMStagPopulateLocalToGlobalInjective(dm));
975 
976   /* Free global offsets */
977   PetscCall(PetscFree(globalOffsets));
978 
979   /* Precompute location offsets */
980   PetscCall(DMStagComputeLocationOffsets_2d(dm));
981 
982   /* View from Options */
983   PetscCall(DMViewFromOptions(dm, NULL, "-dm_view"));
984   PetscFunctionReturn(PETSC_SUCCESS);
985 }
986 
987 /* adapted from da2.c */
DMStagSetUpBuildRankGrid_2d(DM dm)988 static PetscErrorCode DMStagSetUpBuildRankGrid_2d(DM dm)
989 {
990   DM_Stag *const stag = (DM_Stag *)dm->data;
991   PetscMPIInt    rank, size, m, n;
992   const PetscInt M = stag->N[0];
993   const PetscInt N = stag->N[1];
994 
995   PetscFunctionBegin;
996   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
997   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
998   m = stag->nRanks[0];
999   n = stag->nRanks[1];
1000   if (m != PETSC_DECIDE) {
1001     PetscCheck(m >= 1, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Non-positive number of ranks in X direction: %d", m);
1002     PetscCheck(m <= size, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Too many ranks in X direction: %d %d", m, size);
1003   }
1004   if (n != PETSC_DECIDE) {
1005     PetscCheck(n >= 1, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Non-positive number of ranks in Y direction: %d", n);
1006     PetscCheck(n <= size, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Too many ranks in Y direction: %d %d", n, size);
1007   }
1008   if (m == PETSC_DECIDE || n == PETSC_DECIDE) {
1009     if (n != PETSC_DECIDE) {
1010       m = size / n;
1011     } else if (m != PETSC_DECIDE) {
1012       n = size / m;
1013     } else {
1014       /* try for squarish distribution */
1015       m = (PetscMPIInt)(0.5 + PetscSqrtReal(((PetscReal)M) * ((PetscReal)size) / ((PetscReal)N)));
1016       if (!m) m = 1;
1017       while (m > 0) {
1018         n = size / m;
1019         if (m * n == size) break;
1020         m--;
1021       }
1022       if (M > N && m < n) {
1023         PetscMPIInt _m = m;
1024         m              = n;
1025         n              = _m;
1026       }
1027     }
1028     PetscCheck(m * n == size, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Unable to create partition, check the size of the communicator and input m and n ");
1029   } else PetscCheck(m * n == size, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Given Bad partition. Product of sizes (%d) does not equal communicator size (%d)", m * n, size);
1030   PetscCheck(M >= m, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Partition in x direction is too fine! %" PetscInt_FMT " %d", M, m);
1031   PetscCheck(N >= n, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Partition in y direction is too fine! %" PetscInt_FMT " %d", N, n);
1032   stag->nRanks[0] = m;
1033   stag->nRanks[1] = n;
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
DMStagSetUpBuildNeighbors_2d(DM dm)1037 static PetscErrorCode DMStagSetUpBuildNeighbors_2d(DM dm)
1038 {
1039   DM_Stag *const stag = (DM_Stag *)dm->data;
1040   PetscInt       d, i;
1041   PetscBool      per[2], first[2], last[2];
1042   PetscMPIInt    neighborRank[9][2], r[2], n[2];
1043   const PetscInt dim = 2;
1044 
1045   PetscFunctionBegin;
1046   for (d = 0; d < dim; ++d)
1047     PetscCheck(stag->boundaryType[d] == DM_BOUNDARY_NONE || stag->boundaryType[d] == DM_BOUNDARY_PERIODIC || stag->boundaryType[d] == DM_BOUNDARY_GHOSTED, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Neighbor determination not implemented for %s",
1048                DMBoundaryTypes[stag->boundaryType[d]]);
1049 
1050   /* Assemble some convenience variables */
1051   for (d = 0; d < dim; ++d) {
1052     per[d]   = (PetscBool)(stag->boundaryType[d] == DM_BOUNDARY_PERIODIC);
1053     first[d] = stag->firstRank[d];
1054     last[d]  = stag->lastRank[d];
1055     r[d]     = stag->rank[d];
1056     n[d]     = stag->nRanks[d];
1057   }
1058 
1059   /* First, compute the position in the rank grid for all neighbors */
1060   neighborRank[0][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  down */
1061   neighborRank[0][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
1062 
1063   neighborRank[1][0] = r[0]; /*       down */
1064   neighborRank[1][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
1065 
1066   neighborRank[2][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right down */
1067   neighborRank[2][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
1068 
1069   neighborRank[3][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left       */
1070   neighborRank[3][1] = r[1];
1071 
1072   neighborRank[4][0] = r[0];
1073   neighborRank[4][1] = r[1];
1074 
1075   neighborRank[5][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right      */
1076   neighborRank[5][1] = r[1];
1077 
1078   neighborRank[6][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  up   */
1079   neighborRank[6][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
1080 
1081   neighborRank[7][0] = r[0]; /*       up   */
1082   neighborRank[7][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
1083 
1084   neighborRank[8][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right up   */
1085   neighborRank[8][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
1086 
1087   /* Then, compute the rank of each in the linear ordering */
1088   PetscCall(PetscMalloc1(9, &stag->neighbors));
1089   for (i = 0; i < 9; ++i) {
1090     if (neighborRank[i][0] >= 0 && neighborRank[i][1] >= 0) {
1091       stag->neighbors[i] = neighborRank[i][0] + n[0] * neighborRank[i][1];
1092     } else {
1093       stag->neighbors[i] = -1;
1094     }
1095   }
1096   PetscFunctionReturn(PETSC_SUCCESS);
1097 }
1098 
DMStagSetUpBuildGlobalOffsets_2d(DM dm,PetscInt ** pGlobalOffsets)1099 static PetscErrorCode DMStagSetUpBuildGlobalOffsets_2d(DM dm, PetscInt **pGlobalOffsets)
1100 {
1101   const DM_Stag *const stag = (DM_Stag *)dm->data;
1102   PetscInt            *globalOffsets;
1103   PetscInt             i, j, d, entriesPerFace, count;
1104   PetscMPIInt          size;
1105   PetscBool            extra[2];
1106 
1107   PetscFunctionBegin;
1108   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
1109   for (d = 0; d < 2; ++d) extra[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC); /* Extra points in global rep */
1110   entriesPerFace = stag->dof[0] + stag->dof[1];
1111   PetscCall(PetscMalloc1(size, pGlobalOffsets));
1112   globalOffsets    = *pGlobalOffsets;
1113   globalOffsets[0] = 0;
1114   count            = 1; /* note the count is offset by 1 here. We add the size of the previous rank */
1115   for (j = 0; j < stag->nRanks[1] - 1; ++j) {
1116     const PetscInt nnj = stag->l[1][j];
1117     for (i = 0; i < stag->nRanks[0] - 1; ++i) {
1118       const PetscInt nni   = stag->l[0][i];
1119       globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * stag->entriesPerElement; /* No right/top/front boundaries */
1120       ++count;
1121     }
1122     {
1123       /* i = stag->nRanks[0]-1; */
1124       const PetscInt nni   = stag->l[0][i];
1125       globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * stag->entriesPerElement + (extra[0] ? nnj * entriesPerFace : 0); /* Extra faces on the right */
1126       ++count;
1127     }
1128   }
1129   {
1130     /* j = stag->nRanks[1]-1; */
1131     const PetscInt nnj = stag->l[1][j];
1132     for (i = 0; i < stag->nRanks[0] - 1; ++i) {
1133       const PetscInt nni   = stag->l[0][i];
1134       globalOffsets[count] = globalOffsets[count - 1] + nni * nnj * stag->entriesPerElement + (extra[1] ? nni * entriesPerFace : 0); /* Extra faces on the top */
1135       ++count;
1136     }
1137     /* Don't need to compute entries in last element */
1138   }
1139   PetscFunctionReturn(PETSC_SUCCESS);
1140 }
1141 
DMStagComputeLocationOffsets_2d(DM dm)1142 static PetscErrorCode DMStagComputeLocationOffsets_2d(DM dm)
1143 {
1144   DM_Stag *const stag = (DM_Stag *)dm->data;
1145   const PetscInt epe  = stag->entriesPerElement;
1146   const PetscInt epr  = stag->nGhost[0] * epe;
1147 
1148   PetscFunctionBegin;
1149   PetscCall(PetscMalloc1(DMSTAG_NUMBER_LOCATIONS, &stag->locationOffsets));
1150   stag->locationOffsets[DMSTAG_DOWN_LEFT]  = 0;
1151   stag->locationOffsets[DMSTAG_DOWN]       = stag->locationOffsets[DMSTAG_DOWN_LEFT] + stag->dof[0];
1152   stag->locationOffsets[DMSTAG_DOWN_RIGHT] = stag->locationOffsets[DMSTAG_DOWN_LEFT] + epe;
1153   stag->locationOffsets[DMSTAG_LEFT]       = stag->locationOffsets[DMSTAG_DOWN] + stag->dof[1];
1154   stag->locationOffsets[DMSTAG_ELEMENT]    = stag->locationOffsets[DMSTAG_LEFT] + stag->dof[1];
1155   stag->locationOffsets[DMSTAG_RIGHT]      = stag->locationOffsets[DMSTAG_LEFT] + epe;
1156   stag->locationOffsets[DMSTAG_UP_LEFT]    = stag->locationOffsets[DMSTAG_DOWN_LEFT] + epr;
1157   stag->locationOffsets[DMSTAG_UP]         = stag->locationOffsets[DMSTAG_DOWN] + epr;
1158   stag->locationOffsets[DMSTAG_UP_RIGHT]   = stag->locationOffsets[DMSTAG_UP_LEFT] + epe;
1159   PetscFunctionReturn(PETSC_SUCCESS);
1160 }
1161 
DMStagPopulateLocalToGlobalInjective_2d(DM dm)1162 PETSC_INTERN PetscErrorCode DMStagPopulateLocalToGlobalInjective_2d(DM dm)
1163 {
1164   DM_Stag *const  stag = (DM_Stag *)dm->data;
1165   PetscInt       *idxLocal, *idxGlobal, *globalOffsetsRecomputed;
1166   const PetscInt *globalOffsets;
1167   PetscInt        i, j, d, count, entriesPerCorner, entriesPerFace, entriesPerElementRowGhost, entriesPerElementRow, ghostOffsetStart[2];
1168   IS              isLocal, isGlobal;
1169   PetscBool       dummyEnd[2];
1170 
1171   PetscFunctionBegin;
1172   PetscCall(DMStagSetUpBuildGlobalOffsets_2d(dm, &globalOffsetsRecomputed)); /* note that we don't actually use all of these. An available optimization is to pass them, when available */
1173   globalOffsets = globalOffsetsRecomputed;
1174   PetscCall(PetscMalloc1(stag->entries, &idxLocal));
1175   PetscCall(PetscMalloc1(stag->entries, &idxGlobal));
1176   for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1177   entriesPerCorner          = stag->dof[0];
1178   entriesPerFace            = stag->dof[0] + stag->dof[1];
1179   entriesPerElementRow      = stag->n[0] * stag->entriesPerElement + (dummyEnd[0] ? entriesPerFace : 0);
1180   entriesPerElementRowGhost = stag->nGhost[0] * stag->entriesPerElement;
1181   count                     = 0;
1182   for (d = 0; d < 2; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
1183   {
1184     const PetscInt neighbor     = 4;
1185     const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
1186     for (j = 0; j < stag->n[1]; ++j) {
1187       const PetscInt jghost = j + ghostOffsetStart[1];
1188       for (i = 0; i < stag->n[0]; ++i) {
1189         const PetscInt ighost = i + ghostOffsetStart[0];
1190         for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
1191           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
1192           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1193         }
1194       }
1195       if (dummyEnd[0]) {
1196         const PetscInt ighost = i + ghostOffsetStart[0];
1197         i                     = stag->n[0];
1198         for (d = 0; d < stag->dof[0]; ++d, ++count) { /* vertex first */
1199           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
1200           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1201         }
1202         for (d = 0; d < stag->dof[1]; ++d, ++count) { /* then left edge (skipping bottom face) */
1203           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + stag->dof[0] + d;
1204           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
1205         }
1206       }
1207     }
1208     if (dummyEnd[1]) {
1209       const PetscInt jghost = j + ghostOffsetStart[1];
1210       j                     = stag->n[1];
1211       for (i = 0; i < stag->n[0]; ++i) {
1212         const PetscInt ighost = i + ghostOffsetStart[0];
1213         for (d = 0; d < entriesPerFace; ++d, ++count) {                                        /* vertex and bottom face (which are the first entries) */
1214           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
1215           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1216         }
1217       }
1218       if (dummyEnd[0]) {
1219         const PetscInt ighost = i + ghostOffsetStart[0];
1220         i                     = stag->n[0];
1221         for (d = 0; d < entriesPerCorner; ++d, ++count) {                                      /* vertex only */
1222           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
1223           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1224         }
1225       }
1226     }
1227   }
1228   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), stag->entries, idxLocal, PETSC_OWN_POINTER, &isLocal));
1229   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), stag->entries, idxGlobal, PETSC_OWN_POINTER, &isGlobal));
1230   {
1231     Vec local, global;
1232     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm), 1, stag->entries, PETSC_DECIDE, NULL, &global));
1233     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, PetscMax(stag->entriesPerElement, 1), stag->entriesGhost, NULL, &local));
1234     PetscCall(VecScatterCreate(local, isLocal, global, isGlobal, &stag->ltog_injective));
1235     PetscCall(VecDestroy(&global));
1236     PetscCall(VecDestroy(&local));
1237   }
1238   PetscCall(ISDestroy(&isLocal));
1239   PetscCall(ISDestroy(&isGlobal));
1240   if (globalOffsetsRecomputed) PetscCall(PetscFree(globalOffsetsRecomputed));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
DMStagPopulateLocalToLocal2d_Internal(DM dm)1244 PETSC_INTERN PetscErrorCode DMStagPopulateLocalToLocal2d_Internal(DM dm)
1245 {
1246   DM_Stag *const stag = (DM_Stag *)dm->data;
1247   PetscInt      *idxRemap;
1248   PetscBool      dummyEnd[2];
1249   PetscInt       i, j, d, count, leftGhostElements, downGhostElements, entriesPerRowGhost, iOffset, jOffset;
1250   PetscInt       dOffset[4] = {0};
1251 
1252   PetscFunctionBegin;
1253   PetscCall(VecScatterCopy(stag->gtol, &stag->ltol));
1254   PetscCall(PetscMalloc1(stag->entries, &idxRemap));
1255 
1256   for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1257   leftGhostElements  = stag->start[0] - stag->startGhost[0];
1258   downGhostElements  = stag->start[1] - stag->startGhost[1];
1259   entriesPerRowGhost = stag->nGhost[0] * stag->entriesPerElement;
1260   dOffset[1]         = dOffset[0] + stag->dof[0];
1261   dOffset[2]         = dOffset[1] + stag->dof[1];
1262   dOffset[3]         = dOffset[2] + stag->dof[1];
1263 
1264   count = 0;
1265   for (j = 0; j < stag->n[1]; ++j) {
1266     jOffset = entriesPerRowGhost * (downGhostElements + j);
1267     for (i = 0; i < stag->n[0]; ++i) {
1268       iOffset = stag->entriesPerElement * (leftGhostElements + i);
1269       // all
1270       for (d = 0; d < stag->entriesPerElement; ++d) idxRemap[count++] = jOffset + iOffset + d;
1271     }
1272     if (dummyEnd[0]) {
1273       iOffset = stag->entriesPerElement * (leftGhostElements + stag->n[0]);
1274       // down left, left
1275       for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[0] + d;
1276       for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[2] + d;
1277     }
1278   }
1279   if (dummyEnd[1]) {
1280     jOffset = entriesPerRowGhost * (downGhostElements + stag->n[1]);
1281     for (i = 0; i < stag->n[0]; ++i) {
1282       iOffset = stag->entriesPerElement * (leftGhostElements + i);
1283       // down left, down
1284       for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[0] + d;
1285       for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[1] + d;
1286     }
1287     if (dummyEnd[0]) {
1288       iOffset = stag->entriesPerElement * (leftGhostElements + stag->n[0]);
1289       // down left
1290       for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[0] + d;
1291     }
1292   }
1293 
1294   PetscCheck(count == stag->entries, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of entries computed in ltol (%" PetscInt_FMT ") is not as expected (%" PetscInt_FMT ")", count, stag->entries);
1295 
1296   PetscCall(VecScatterRemap(stag->ltol, idxRemap, NULL));
1297   PetscCall(PetscFree(idxRemap));
1298   PetscFunctionReturn(PETSC_SUCCESS);
1299 }
1300 
DMCreateMatrix_Stag_2D_AIJ_Assemble(DM dm,Mat A)1301 PETSC_INTERN PetscErrorCode DMCreateMatrix_Stag_2D_AIJ_Assemble(DM dm, Mat A)
1302 {
1303   PetscInt          entries, dof[DMSTAG_MAX_STRATA], epe, stencil_width, N[2], start[2], n[2], n_extra[2];
1304   DMStagStencilType stencil_type;
1305   DMBoundaryType    boundary_type[2];
1306 
1307   PetscFunctionBegin;
1308   PetscCall(DMStagGetDOF(dm, &dof[0], &dof[1], &dof[2], NULL));
1309   PetscCall(DMStagGetStencilType(dm, &stencil_type));
1310   PetscCall(DMStagGetStencilWidth(dm, &stencil_width));
1311   PetscCall(DMStagGetEntries(dm, &entries));
1312   PetscCall(DMStagGetEntriesPerElement(dm, &epe));
1313   PetscCall(DMStagGetCorners(dm, &start[0], &start[1], NULL, &n[0], &n[1], NULL, &n_extra[0], &n_extra[1], NULL));
1314   PetscCall(DMStagGetGlobalSizes(dm, &N[0], &N[1], NULL));
1315   PetscCall(DMStagGetBoundaryTypes(dm, &boundary_type[0], &boundary_type[1], NULL));
1316 
1317   if (stencil_type == DMSTAG_STENCIL_NONE) {
1318     /* Couple all DOF at each location to each other */
1319     DMStagStencil *row_vertex, *row_face_down, *row_face_left, *row_element;
1320 
1321     PetscCall(PetscMalloc1(dof[0], &row_vertex));
1322     for (PetscInt c = 0; c < dof[0]; ++c) {
1323       row_vertex[c].loc = DMSTAG_DOWN_LEFT;
1324       row_vertex[c].c   = c;
1325     }
1326 
1327     PetscCall(PetscMalloc1(dof[1], &row_face_down));
1328     for (PetscInt c = 0; c < dof[1]; ++c) {
1329       row_face_down[c].loc = DMSTAG_DOWN;
1330       row_face_down[c].c   = c;
1331     }
1332 
1333     PetscCall(PetscMalloc1(dof[1], &row_face_left));
1334     for (PetscInt c = 0; c < dof[1]; ++c) {
1335       row_face_left[c].loc = DMSTAG_LEFT;
1336       row_face_left[c].c   = c;
1337     }
1338 
1339     PetscCall(PetscMalloc1(dof[2], &row_element));
1340     for (PetscInt c = 0; c < dof[2]; ++c) {
1341       row_element[c].loc = DMSTAG_ELEMENT;
1342       row_element[c].c   = c;
1343     }
1344 
1345     for (PetscInt ey = start[1]; ey < start[1] + n[1] + n_extra[1]; ++ey) {
1346       for (PetscInt ex = start[0]; ex < start[0] + n[0] + n_extra[0]; ++ex) {
1347         {
1348           for (PetscInt c = 0; c < dof[0]; ++c) {
1349             row_vertex[c].i = ex;
1350             row_vertex[c].j = ey;
1351           }
1352           PetscCall(DMStagMatSetValuesStencil(dm, A, dof[0], row_vertex, dof[0], row_vertex, NULL, INSERT_VALUES));
1353         }
1354         if (ex < N[0]) {
1355           for (PetscInt c = 0; c < dof[1]; ++c) {
1356             row_face_down[c].i = ex;
1357             row_face_down[c].j = ey;
1358           }
1359           PetscCall(DMStagMatSetValuesStencil(dm, A, dof[1], row_face_down, dof[1], row_face_down, NULL, INSERT_VALUES));
1360         }
1361         if (ey < N[1]) {
1362           for (PetscInt c = 0; c < dof[1]; ++c) {
1363             row_face_left[c].i = ex;
1364             row_face_left[c].j = ey;
1365           }
1366           PetscCall(DMStagMatSetValuesStencil(dm, A, dof[1], row_face_left, dof[1], row_face_left, NULL, INSERT_VALUES));
1367         }
1368         if (ex < N[0] && ey < N[1]) {
1369           for (PetscInt c = 0; c < dof[2]; ++c) {
1370             row_element[c].i = ex;
1371             row_element[c].j = ey;
1372           }
1373           PetscCall(DMStagMatSetValuesStencil(dm, A, dof[2], row_element, dof[2], row_element, NULL, INSERT_VALUES));
1374         }
1375       }
1376     }
1377     PetscCall(PetscFree(row_vertex));
1378     PetscCall(PetscFree(row_face_left));
1379     PetscCall(PetscFree(row_face_down));
1380     PetscCall(PetscFree(row_element));
1381   } else if (stencil_type == DMSTAG_STENCIL_STAR || stencil_type == DMSTAG_STENCIL_BOX) {
1382     DMStagStencil *col, *row;
1383 
1384     PetscCall(PetscMalloc1(epe, &row));
1385     {
1386       PetscInt nrows = 0;
1387 
1388       for (PetscInt c = 0; c < dof[0]; ++c) {
1389         row[nrows].c   = c;
1390         row[nrows].loc = DMSTAG_DOWN_LEFT;
1391         ++nrows;
1392       }
1393       for (PetscInt c = 0; c < dof[1]; ++c) {
1394         row[nrows].c   = c;
1395         row[nrows].loc = DMSTAG_LEFT;
1396         ++nrows;
1397       }
1398       for (PetscInt c = 0; c < dof[1]; ++c) {
1399         row[nrows].c   = c;
1400         row[nrows].loc = DMSTAG_DOWN;
1401         ++nrows;
1402       }
1403       for (PetscInt c = 0; c < dof[2]; ++c) {
1404         row[nrows].c   = c;
1405         row[nrows].loc = DMSTAG_ELEMENT;
1406         ++nrows;
1407       }
1408     }
1409 
1410     PetscCall(PetscMalloc1(epe, &col));
1411     {
1412       PetscInt ncols = 0;
1413 
1414       for (PetscInt c = 0; c < dof[0]; ++c) {
1415         col[ncols].c   = c;
1416         col[ncols].loc = DMSTAG_DOWN_LEFT;
1417         ++ncols;
1418       }
1419       for (PetscInt c = 0; c < dof[1]; ++c) {
1420         col[ncols].c   = c;
1421         col[ncols].loc = DMSTAG_LEFT;
1422         ++ncols;
1423       }
1424       for (PetscInt c = 0; c < dof[1]; ++c) {
1425         col[ncols].c   = c;
1426         col[ncols].loc = DMSTAG_DOWN;
1427         ++ncols;
1428       }
1429       for (PetscInt c = 0; c < dof[2]; ++c) {
1430         col[ncols].c   = c;
1431         col[ncols].loc = DMSTAG_ELEMENT;
1432         ++ncols;
1433       }
1434     }
1435 
1436     for (PetscInt ey = start[1]; ey < start[1] + n[1] + n_extra[1]; ++ey) {
1437       for (PetscInt ex = start[0]; ex < start[0] + n[0] + n_extra[0]; ++ex) {
1438         for (PetscInt i = 0; i < epe; ++i) {
1439           row[i].i = ex;
1440           row[i].j = ey;
1441         }
1442         for (PetscInt offset_y = -stencil_width; offset_y <= stencil_width; ++offset_y) {
1443           const PetscInt ey_offset = ey + offset_y;
1444           for (PetscInt offset_x = -stencil_width; offset_x <= stencil_width; ++offset_x) {
1445             const PetscInt ex_offset = ex + offset_x;
1446             /* Only set values corresponding to elements which can have non-dummy entries,
1447                meaning those that map to unknowns in the global representation. In the periodic
1448                case, this is the entire stencil, but in all other cases, only includes a single
1449                "extra" element which is partially outside the physical domain (those points in the
1450                global representation */
1451             if ((stencil_type == DMSTAG_STENCIL_BOX || offset_x == 0 || offset_y == 0) && (boundary_type[0] == DM_BOUNDARY_PERIODIC || (ex_offset < N[0] + 1 && ex_offset >= 0)) && (boundary_type[1] == DM_BOUNDARY_PERIODIC || (ey_offset < N[1] + 1 && ey_offset >= 0))) {
1452               for (PetscInt i = 0; i < epe; ++i) {
1453                 col[i].i = ex_offset;
1454                 col[i].j = ey_offset;
1455               }
1456               PetscCall(DMStagMatSetValuesStencil(dm, A, epe, row, epe, col, NULL, INSERT_VALUES));
1457             }
1458           }
1459         }
1460       }
1461     }
1462     PetscCall(PetscFree(row));
1463     PetscCall(PetscFree(col));
1464   } else SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Unsupported stencil type %s", DMStagStencilTypes[stencil_type]);
1465   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1466   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1467   PetscFunctionReturn(PETSC_SUCCESS);
1468 }
1469