xref: /petsc/src/dm/impls/stag/stag3d.c (revision 4f037aad63b0dd02ec302d96e63d16a2808e7a1f)
1 /* Functions specific to the 3-dimensional implementation of DMStag */
2 #include <petsc/private/dmstagimpl.h> /*I  "petscdmstag.h"   I*/
3 
4 /*@
5   DMStagCreate3d - Create an object to manage data living on the elements, faces, edges, and vertices of a parallelized regular 3D grid.
6 
7   Collective
8 
9   Input Parameters:
10 + comm         - MPI communicator
11 . bndx         - x boundary type, `DM_BOUNDARY_NONE`, `DM_BOUNDARY_PERIODIC`, or `DM_BOUNDARY_GHOSTED`
12 . bndy         - y boundary type, `DM_BOUNDARY_NONE`, `DM_BOUNDARY_PERIODIC`, or `DM_BOUNDARY_GHOSTED`
13 . bndz         - z boundary type, `DM_BOUNDARY_NONE`, `DM_BOUNDARY_PERIODIC`, or `DM_BOUNDARY_GHOSTED`
14 . M            - global number of elements in x direction
15 . N            - global number of elements in y direction
16 . P            - global number of elements in z direction
17 . m            - number of ranks in the x direction (may be `PETSC_DECIDE`)
18 . n            - number of ranks in the y direction (may be `PETSC_DECIDE`)
19 . p            - number of ranks in the z direction (may be `PETSC_DECIDE`)
20 . dof0         - number of degrees of freedom per vertex/0-cell
21 . dof1         - number of degrees of freedom per edge/1-cell
22 . dof2         - number of degrees of freedom per face/2-cell
23 . dof3         - number of degrees of freedom per element/3-cell
24 . stencilType  - ghost/halo region type: `DMSTAG_STENCIL_NONE`, `DMSTAG_STENCIL_BOX`, or `DMSTAG_STENCIL_STAR`
25 . stencilWidth - width, in elements, of halo/ghost region
26 . lx           - array of local x  element counts, of length equal to `m`, summing to `M`, or `NULL`
27 . ly           - arrays of local y element counts, of length equal to `n`, summing to `N`, or `NULL`
28 - lz           - arrays of local z element counts, of length equal to `p`, summing to `P`, or `NULL`
29 
30   Output Parameter:
31 . dm - the new `DMSTAG` object
32 
33   Options Database Keys:
34 + -dm_view                                      - calls `DMViewFromOptions()` at the conclusion of `DMSetUp()`
35 . -stag_grid_x <nx>                             - number of elements in the x direction
36 . -stag_grid_y <ny>                             - number of elements in the y direction
37 . -stag_grid_z <nz>                             - number of elements in the z direction
38 . -stag_ranks_x <rx>                            - number of ranks in the x direction
39 . -stag_ranks_y <ry>                            - number of ranks in the y direction
40 . -stag_ranks_z <rz>                            - number of ranks in the z direction
41 . -stag_ghost_stencil_width                     - width of ghost region, in elements
42 . -stag_boundary_type x <none,ghosted,periodic> - `DMBoundaryType` value
43 . -stag_boundary_type y <none,ghosted,periodic> - `DMBoundaryType` value
44 - -stag_boundary_type z <none,ghosted,periodic> - `DMBoundaryType` value
45 
46   Level: beginner
47 
48   Notes:
49   You must call `DMSetUp()` after this call before using the `DM`.
50   If you wish to use the options database (see the keys above) to change values in the `DMSTAG`, you must call
51   `DMSetFromOptions()` after this function but before `DMSetUp()`.
52 
53 .seealso: [](ch_stag), `DMSTAG`, `DMStagCreate1d()`, `DMStagCreate2d()`, `DMDestroy()`, `DMView()`, `DMCreateGlobalVector()`, `DMCreateLocalVector()`, `DMLocalToGlobalBegin()`, `DMDACreate3d()`
54 @*/
DMStagCreate3d(MPI_Comm comm,DMBoundaryType bndx,DMBoundaryType bndy,DMBoundaryType bndz,PetscInt M,PetscInt N,PetscInt P,PetscInt m,PetscInt n,PetscInt p,PetscInt dof0,PetscInt dof1,PetscInt dof2,PetscInt dof3,DMStagStencilType stencilType,PetscInt stencilWidth,const PetscInt lx[],const PetscInt ly[],const PetscInt lz[],DM * dm)55 PetscErrorCode DMStagCreate3d(MPI_Comm comm, DMBoundaryType bndx, DMBoundaryType bndy, DMBoundaryType bndz, PetscInt M, PetscInt N, PetscInt P, PetscInt m, PetscInt n, PetscInt p, PetscInt dof0, PetscInt dof1, PetscInt dof2, PetscInt dof3, DMStagStencilType stencilType, PetscInt stencilWidth, const PetscInt lx[], const PetscInt ly[], const PetscInt lz[], DM *dm)
56 {
57   PetscFunctionBegin;
58   PetscCall(DMCreate(comm, dm));
59   PetscCall(DMSetDimension(*dm, 3));
60   PetscCall(DMStagInitialize(bndx, bndy, bndz, M, N, P, m, n, p, dof0, dof1, dof2, dof3, stencilType, stencilWidth, lx, ly, lz, *dm));
61   PetscFunctionReturn(PETSC_SUCCESS);
62 }
63 
DMStagRestrictSimple_3d(DM dmf,Vec xf_local,DM dmc,Vec xc_local)64 PETSC_INTERN PetscErrorCode DMStagRestrictSimple_3d(DM dmf, Vec xf_local, DM dmc, Vec xc_local)
65 {
66   PetscInt              Mf, Nf, Pf, Mc, Nc, Pc, factorx, factory, factorz, dof[4];
67   PetscInt              xc, yc, zc, mc, nc, pc, nExtraxc, nExtrayc, nExtrazc, i, j, k, d;
68   PetscInt              ibackdownleftf, ibackdownf, ibackleftf, ibackf, idownleftf, idownf, ileftf, ielemf;
69   PetscInt              ibackdownleftc, ibackdownc, ibackleftc, ibackc, idownleftc, idownc, ileftc, ielemc;
70   const PetscScalar ****arrf;
71   PetscScalar       ****arrc;
72 
73   PetscFunctionBegin;
74   PetscCall(DMStagGetGlobalSizes(dmf, &Mf, &Nf, &Pf));
75   PetscCall(DMStagGetGlobalSizes(dmc, &Mc, &Nc, &Pc));
76   factorx = Mf / Mc;
77   factory = Nf / Nc;
78   factorz = Pf / Pc;
79   PetscCall(DMStagGetDOF(dmc, &dof[0], &dof[1], &dof[2], &dof[3]));
80 
81   PetscCall(DMStagGetCorners(dmc, &xc, &yc, &zc, &mc, &nc, &pc, &nExtraxc, &nExtrayc, &nExtrazc));
82   PetscCall(VecZeroEntries(xc_local));
83   PetscCall(DMStagVecGetArray(dmf, xf_local, &arrf));
84   PetscCall(DMStagVecGetArray(dmc, xc_local, &arrc));
85   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_BACK_DOWN_LEFT, 0, &ibackdownleftf));
86   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_BACK_DOWN, 0, &ibackdownf));
87   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_BACK_LEFT, 0, &ibackleftf));
88   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_BACK, 0, &ibackf));
89   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_DOWN_LEFT, 0, &idownleftf));
90   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_DOWN, 0, &idownf));
91   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_LEFT, 0, &ileftf));
92   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_ELEMENT, 0, &ielemf));
93   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_BACK_DOWN_LEFT, 0, &ibackdownleftc));
94   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_BACK_DOWN, 0, &ibackdownc));
95   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_BACK_LEFT, 0, &ibackleftc));
96   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_BACK, 0, &ibackc));
97   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_DOWN_LEFT, 0, &idownleftc));
98   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_DOWN, 0, &idownc));
99   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_LEFT, 0, &ileftc));
100   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_ELEMENT, 0, &ielemc));
101 
102   for (d = 0; d < dof[0]; ++d)
103     for (k = zc; k < zc + pc + nExtrazc; ++k)
104       for (j = yc; j < yc + nc + nExtrayc; ++j)
105         for (i = xc; i < xc + mc + nExtraxc; ++i) {
106           const PetscInt ii = i * factorx, jj = j * factory, kk = k * factorz;
107 
108           arrc[k][j][i][ibackdownleftc + d] = arrf[kk][jj][ii][ibackdownleftf + d];
109         }
110 
111   for (d = 0; d < dof[1]; ++d)
112     for (k = zc; k < zc + pc + nExtrazc; ++k)
113       for (j = yc; j < yc + nc + nExtrayc; ++j)
114         for (i = xc; i < xc + mc; ++i) {
115           const PetscInt ii = i * factorx + factorx / 2, jj = j * factory, kk = k * factorz;
116 
117           if (factorx % 2 == 0) arrc[k][j][i][ibackdownc + d] = 0.5 * (arrf[kk][jj][ii - 1][ibackdownf + d] + arrf[kk][jj][ii][ibackdownf + d]);
118           else arrc[k][j][i][ibackdownc + d] = arrf[kk][jj][ii][ibackdownf + d];
119         }
120 
121   for (d = 0; d < dof[1]; ++d)
122     for (k = zc; k < zc + pc + nExtrazc; ++k)
123       for (j = yc; j < yc + nc; ++j)
124         for (i = xc; i < xc + mc + nExtraxc; ++i) {
125           const PetscInt ii = i * factorx, jj = j * factory + factory / 2, kk = k * factorz;
126 
127           if (factory % 2 == 0) arrc[k][j][i][ibackleftc + d] = 0.5 * (arrf[kk][jj - 1][ii][ibackleftf + d] + arrf[kk][jj][ii][ibackleftf + d]);
128           else arrc[k][j][i][ibackleftc + d] = arrf[kk][jj][ii][ibackleftf + d];
129         }
130 
131   for (d = 0; d < dof[1]; ++d)
132     for (k = zc; k < zc + pc; ++k)
133       for (j = yc; j < yc + nc + nExtrayc; ++j)
134         for (i = xc; i < xc + mc + nExtraxc; ++i) {
135           const PetscInt ii = i * factorx, jj = j * factory, kk = k * factorz + factorz / 2;
136 
137           if (factorz % 2 == 0) arrc[k][j][i][idownleftc + d] = 0.5 * (arrf[kk - 1][jj][ii][idownleftf + d] + arrf[kk][jj][ii][idownleftf + d]);
138           else arrc[k][j][i][idownleftc + d] = arrf[kk][jj][ii][idownleftf + d];
139         }
140 
141   for (d = 0; d < dof[2]; ++d)
142     for (k = zc; k < zc + pc + nExtrazc; ++k)
143       for (j = yc; j < yc + nc; ++j)
144         for (i = xc; i < xc + mc; ++i) {
145           const PetscInt ii = i * factorx + factorx / 2, jj = j * factory + factory / 2, kk = k * factorz;
146 
147           if (factorx % 2 == 0 && factory % 2 == 0) arrc[k][j][i][ibackc + d] = 0.25 * (arrf[kk][jj - 1][ii - 1][ibackf + d] + arrf[kk][jj - 1][ii][ibackf + d] + arrf[kk][jj][ii - 1][ibackf + d] + arrf[kk][jj][ii][ibackf + d]);
148           else if (factorx % 2 == 0) arrc[k][j][i][ibackc + d] = 0.5 * (arrf[kk][jj][ii - 1][ibackf + d] + arrf[kk][jj][ii][ibackf + d]);
149           else if (factory % 2 == 0) arrc[k][j][i][ibackc + d] = 0.5 * (arrf[kk][jj - 1][ii][ibackf + d] + arrf[kk][jj][ii][ibackf + d]);
150           else arrc[k][j][i][ibackc + d] = arrf[kk][jj][ii][ibackf + d];
151         }
152 
153   for (d = 0; d < dof[2]; ++d)
154     for (k = zc; k < zc + pc; ++k)
155       for (j = yc; j < yc + nc + nExtrayc; ++j)
156         for (i = xc; i < xc + mc; ++i) {
157           const PetscInt ii = i * factorx + factorx / 2, jj = j * factory, kk = k * factorz + factorz / 2;
158 
159           if (factorx % 2 == 0 && factorz % 2 == 0) arrc[k][j][i][idownc + d] = 0.25 * (arrf[kk - 1][jj][ii - 1][idownf + d] + arrf[kk - 1][jj][ii][idownf + d] + arrf[kk][jj][ii - 1][idownf + d] + arrf[kk][jj][ii][idownf + d]);
160           else if (factorx % 2 == 0) arrc[k][j][i][idownc + d] = 0.5 * (arrf[kk][jj][ii - 1][idownf + d] + arrf[kk][jj][ii][idownf + d]);
161           else if (factorz % 2 == 0) arrc[k][j][i][idownc + d] = 0.5 * (arrf[kk - 1][jj][ii][idownf + d] + arrf[kk][jj][ii][idownf + d]);
162           else arrc[k][j][i][idownc + d] = arrf[kk][jj][ii][idownf + d];
163         }
164 
165   for (d = 0; d < dof[2]; ++d)
166     for (k = zc; k < zc + pc; ++k)
167       for (j = yc; j < yc + nc; ++j)
168         for (i = xc; i < xc + mc + nExtraxc; ++i) {
169           const PetscInt ii = i * factorx, jj = j * factory + factory / 2, kk = k * factorz + factorz / 2;
170 
171           if (factory % 2 == 0 && factorz % 2 == 0) arrc[k][j][i][ileftc + d] = 0.25 * (arrf[kk - 1][jj - 1][ii][ileftf + d] + arrf[kk - 1][jj][ii][ileftf + d] + arrf[kk][jj - 1][ii][ileftf + d] + arrf[kk][jj][ii][ileftf + d]);
172           else if (factory % 2 == 0) arrc[k][j][i][ileftc + d] = 0.5 * (arrf[kk][jj - 1][ii][ileftf + d] + arrf[kk][jj][ii][ileftf + d]);
173           else if (factorz % 2 == 0) arrc[k][j][i][ileftc + d] = 0.5 * (arrf[kk - 1][jj][ii][ileftf + d] + arrf[kk][jj][ii][ileftf + d]);
174           else arrc[k][j][i][ileftc + d] = arrf[kk][jj][ii][ileftf + d];
175         }
176 
177   for (d = 0; d < dof[3]; ++d)
178     for (k = zc; k < zc + pc; ++k)
179       for (j = yc; j < yc + nc; ++j)
180         for (i = xc; i < xc + mc; ++i) {
181           const PetscInt ii = i * factorx + factorx / 2, jj = j * factory + factory / 2, kk = k * factorz + factorz / 2;
182 
183           if (factorx % 2 == 0 && factory % 2 == 0 && factorz % 2 == 0)
184             arrc[k][j][i][ielemc + d] = 0.125 * (arrf[kk - 1][jj - 1][ii - 1][ielemf + d] + arrf[kk - 1][jj - 1][ii][ielemf + d] + arrf[kk - 1][jj][ii - 1][ielemf + d] + arrf[kk - 1][jj][ii][ielemf + d] + arrf[kk][jj - 1][ii - 1][ielemf + d] + arrf[kk][jj - 1][ii][ielemf + d] + arrf[kk][jj][ii - 1][ielemf + d] + arrf[kk][jj][ii][ielemf + d]);
185           else if (factorx % 2 == 0 && factory % 2 == 0) arrc[k][j][i][ielemc + d] = 0.25 * (arrf[kk][jj - 1][ii - 1][ielemf + d] + arrf[kk][jj - 1][ii][ielemf + d] + arrf[kk][jj][ii - 1][ielemf + d] + arrf[kk][jj][ii][ielemf + d]);
186           else if (factorx % 2 == 0 && factorz % 2 == 0) arrc[k][j][i][ielemc + d] = 0.25 * (arrf[kk - 1][jj][ii - 1][ielemf + d] + arrf[kk - 1][jj][ii][ielemf + d] + arrf[kk][jj][ii - 1][ielemf + d] + arrf[kk][jj][ii][ielemf + d]);
187           else if (factory % 2 == 0 && factorz % 2 == 0) arrc[k][j][i][ielemc + d] = 0.25 * (arrf[kk - 1][jj - 1][ii][ielemf + d] + arrf[kk - 1][jj][ii][ielemf + d] + arrf[kk][jj - 1][ii][ielemf + d] + arrf[kk][jj][ii][ielemf + d]);
188           else if (factorx % 2 == 0) arrc[k][j][i][ielemc + d] = 0.5 * (arrf[kk][jj][ii - 1][ielemf + d] + arrf[kk][jj][ii][ielemf + d]);
189           else if (factory % 2 == 0) arrc[k][j][i][ielemc + d] = 0.5 * (arrf[kk][jj - 1][ii][ielemf + d] + arrf[kk][jj][ii][ielemf + d]);
190           else if (factorz % 2 == 0) arrc[k][j][i][ielemc + d] = 0.5 * (arrf[kk - 1][jj][ii][ielemf + d] + arrf[kk][jj][ii][ielemf + d]);
191           else arrc[k][j][i][ielemc + d] = arrf[kk][jj][ii][ielemf + d];
192         }
193 
194   PetscCall(DMStagVecRestoreArray(dmf, xf_local, &arrf));
195   PetscCall(DMStagVecRestoreArray(dmc, xc_local, &arrc));
196   PetscFunctionReturn(PETSC_SUCCESS);
197 }
198 
DMStagSetUniformCoordinatesExplicit_3d(DM dm,PetscReal xmin,PetscReal xmax,PetscReal ymin,PetscReal ymax,PetscReal zmin,PetscReal zmax)199 PETSC_INTERN PetscErrorCode DMStagSetUniformCoordinatesExplicit_3d(DM dm, PetscReal xmin, PetscReal xmax, PetscReal ymin, PetscReal ymax, PetscReal zmin, PetscReal zmax)
200 {
201   DM_Stag        *stagCoord;
202   DM              dmCoord;
203   Vec             coordLocal;
204   PetscReal       h[3], min[3];
205   PetscScalar ****arr;
206   PetscInt        ind[3], start_ghost[3], n_ghost[3], s, c;
207   PetscInt        ibackdownleft, ibackdown, ibackleft, iback, idownleft, idown, ileft, ielement;
208 
209   PetscFunctionBegin;
210   PetscCall(DMGetCoordinateDM(dm, &dmCoord));
211   stagCoord = (DM_Stag *)dmCoord->data;
212   for (s = 0; s < 4; ++s) {
213     PetscCheck(stagCoord->dof[s] == 0 || stagCoord->dof[s] == 3, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Coordinate DM in 3 dimensions must have 0 or 3 dof on each stratum, but stratum %" PetscInt_FMT " has %" PetscInt_FMT " dof", s,
214                stagCoord->dof[s]);
215   }
216   PetscCall(DMCreateLocalVector(dmCoord, &coordLocal));
217   PetscCall(DMStagVecGetArray(dmCoord, coordLocal, &arr));
218   if (stagCoord->dof[0]) PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_BACK_DOWN_LEFT, 0, &ibackdownleft));
219   if (stagCoord->dof[1]) {
220     PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_BACK_DOWN, 0, &ibackdown));
221     PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_BACK_LEFT, 0, &ibackleft));
222     PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_DOWN_LEFT, 0, &idownleft));
223   }
224   if (stagCoord->dof[2]) {
225     PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_BACK, 0, &iback));
226     PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_DOWN, 0, &idown));
227     PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_LEFT, 0, &ileft));
228   }
229   if (stagCoord->dof[3]) PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_ELEMENT, 0, &ielement));
230   PetscCall(DMStagGetGhostCorners(dmCoord, &start_ghost[0], &start_ghost[1], &start_ghost[2], &n_ghost[0], &n_ghost[1], &n_ghost[2]));
231   min[0] = xmin;
232   min[1] = ymin;
233   min[2] = zmin;
234   h[0]   = (xmax - xmin) / stagCoord->N[0];
235   h[1]   = (ymax - ymin) / stagCoord->N[1];
236   h[2]   = (zmax - zmin) / stagCoord->N[2];
237 
238   for (ind[2] = start_ghost[2]; ind[2] < start_ghost[2] + n_ghost[2]; ++ind[2]) {
239     for (ind[1] = start_ghost[1]; ind[1] < start_ghost[1] + n_ghost[1]; ++ind[1]) {
240       for (ind[0] = start_ghost[0]; ind[0] < start_ghost[0] + n_ghost[0]; ++ind[0]) {
241         if (stagCoord->dof[0]) {
242           const PetscReal offs[3] = {0.0, 0.0, 0.0};
243           for (c = 0; c < 3; ++c) arr[ind[2]][ind[1]][ind[0]][ibackdownleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
244         }
245         if (stagCoord->dof[1]) {
246           const PetscReal offs[3] = {0.5, 0.0, 0.0};
247           for (c = 0; c < 3; ++c) arr[ind[2]][ind[1]][ind[0]][ibackdown + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
248         }
249         if (stagCoord->dof[1]) {
250           const PetscReal offs[3] = {0.0, 0.5, 0.0};
251           for (c = 0; c < 3; ++c) arr[ind[2]][ind[1]][ind[0]][ibackleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
252         }
253         if (stagCoord->dof[2]) {
254           const PetscReal offs[3] = {0.5, 0.5, 0.0};
255           for (c = 0; c < 3; ++c) arr[ind[2]][ind[1]][ind[0]][iback + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
256         }
257         if (stagCoord->dof[1]) {
258           const PetscReal offs[3] = {0.0, 0.0, 0.5};
259           for (c = 0; c < 3; ++c) arr[ind[2]][ind[1]][ind[0]][idownleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
260         }
261         if (stagCoord->dof[2]) {
262           const PetscReal offs[3] = {0.5, 0.0, 0.5};
263           for (c = 0; c < 3; ++c) arr[ind[2]][ind[1]][ind[0]][idown + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
264         }
265         if (stagCoord->dof[2]) {
266           const PetscReal offs[3] = {0.0, 0.5, 0.5};
267           for (c = 0; c < 3; ++c) arr[ind[2]][ind[1]][ind[0]][ileft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
268         }
269         if (stagCoord->dof[3]) {
270           const PetscReal offs[3] = {0.5, 0.5, 0.5};
271           for (c = 0; c < 3; ++c) arr[ind[2]][ind[1]][ind[0]][ielement + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
272         }
273       }
274     }
275   }
276   PetscCall(DMStagVecRestoreArray(dmCoord, coordLocal, &arr));
277   PetscCall(DMSetCoordinatesLocal(dm, coordLocal));
278   PetscCall(VecDestroy(&coordLocal));
279   PetscFunctionReturn(PETSC_SUCCESS);
280 }
281 
282 /* Helper functions used in DMSetUp_Stag() */
283 static PetscErrorCode DMStagSetUpBuildRankGrid_3d(DM);
284 static PetscErrorCode DMStagSetUpBuildNeighbors_3d(DM);
285 static PetscErrorCode DMStagSetUpBuildGlobalOffsets_3d(DM, PetscInt **);
286 static PetscErrorCode DMStagSetUpBuildScatter_3d(DM, const PetscInt *);
287 static PetscErrorCode DMStagSetUpBuildL2G_3d(DM, const PetscInt *);
288 static PetscErrorCode DMStagComputeLocationOffsets_3d(DM);
289 
DMSetUp_Stag_3d(DM dm)290 PETSC_INTERN PetscErrorCode DMSetUp_Stag_3d(DM dm)
291 {
292   DM_Stag *const stag = (DM_Stag *)dm->data;
293   PetscMPIInt    rank;
294   PetscInt       i, j, d;
295   PetscInt      *globalOffsets;
296   const PetscInt dim = 3;
297 
298   PetscFunctionBegin;
299   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
300 
301   /* Rank grid sizes (populates stag->nRanks) */
302   PetscCall(DMStagSetUpBuildRankGrid_3d(dm));
303 
304   /* Determine location of rank in grid */
305   stag->rank[0] = rank % stag->nRanks[0];
306   stag->rank[1] = rank % (stag->nRanks[0] * stag->nRanks[1]) / stag->nRanks[0];
307   stag->rank[2] = rank / (stag->nRanks[0] * stag->nRanks[1]);
308   for (d = 0; d < dim; ++d) {
309     stag->firstRank[d] = PetscNot(stag->rank[d]);
310     stag->lastRank[d]  = (PetscBool)(stag->rank[d] == stag->nRanks[d] - 1);
311   }
312 
313   /* Determine locally owned region (if not already prescribed).
314    Divide equally, giving lower ranks in each dimension and extra element if needbe.
315    Note that this uses O(P) storage. If this ever becomes an issue, this could
316    be refactored to not keep this data around.  */
317   for (i = 0; i < dim; ++i) {
318     if (!stag->l[i]) {
319       const PetscInt Ni = stag->N[i], nRanksi = stag->nRanks[i];
320       PetscCall(PetscMalloc1(stag->nRanks[i], &stag->l[i]));
321       for (j = 0; j < stag->nRanks[i]; ++j) stag->l[i][j] = Ni / nRanksi + ((Ni % nRanksi) > j);
322     }
323   }
324 
325   /* Retrieve local size in stag->n */
326   for (i = 0; i < dim; ++i) stag->n[i] = stag->l[i][stag->rank[i]];
327   if (PetscDefined(USE_DEBUG)) {
328     for (i = 0; i < dim; ++i) {
329       PetscInt Ncheck, j;
330       Ncheck = 0;
331       for (j = 0; j < stag->nRanks[i]; ++j) Ncheck += stag->l[i][j];
332       PetscCheck(Ncheck == stag->N[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local sizes in dimension %" PetscInt_FMT " don't add up. %" PetscInt_FMT " != %" PetscInt_FMT, i, Ncheck, stag->N[i]);
333     }
334   }
335 
336   /* Compute starting elements */
337   for (i = 0; i < dim; ++i) {
338     stag->start[i] = 0;
339     for (j = 0; j < stag->rank[i]; ++j) stag->start[i] += stag->l[i][j];
340   }
341 
342   /* Determine ranks of neighbors */
343   PetscCall(DMStagSetUpBuildNeighbors_3d(dm));
344 
345   /* Define useful sizes */
346   {
347     PetscInt  entriesPerEdge, entriesPerFace, entriesPerCorner, entriesPerElementRow, entriesPerElementLayer;
348     PetscBool dummyEnd[3];
349     for (d = 0; d < 3; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
350     stag->entriesPerElement = stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2] + stag->dof[3];
351     entriesPerFace          = stag->dof[0] + 2 * stag->dof[1] + stag->dof[2];
352     entriesPerEdge          = stag->dof[0] + stag->dof[1];
353     entriesPerCorner        = stag->dof[0];
354     entriesPerElementRow    = stag->n[0] * stag->entriesPerElement + (dummyEnd[0] ? entriesPerFace : 0);
355     entriesPerElementLayer  = stag->n[1] * entriesPerElementRow + (dummyEnd[1] ? stag->n[0] * entriesPerFace : 0) + (dummyEnd[1] && dummyEnd[0] ? entriesPerEdge : 0);
356     stag->entries = stag->n[2] * entriesPerElementLayer + (dummyEnd[2] ? stag->n[0] * stag->n[1] * entriesPerFace : 0) + (dummyEnd[2] && dummyEnd[0] ? stag->n[1] * entriesPerEdge : 0) + (dummyEnd[2] && dummyEnd[1] ? stag->n[0] * entriesPerEdge : 0) + (dummyEnd[2] && dummyEnd[1] && dummyEnd[0] ? entriesPerCorner : 0);
357   }
358 
359   /* Check that we will not overflow 32-bit indices (slightly overconservative) */
360 #if !defined(PETSC_USE_64BIT_INDICES)
361   PetscCheck(((PetscInt64)stag->n[0]) * ((PetscInt64)stag->n[1]) * ((PetscInt64)stag->n[2]) * ((PetscInt64)stag->entriesPerElement) <= (PetscInt64)PETSC_MPI_INT_MAX, PetscObjectComm((PetscObject)dm), PETSC_ERR_INT_OVERFLOW, "Mesh of %" PetscInt_FMT " x %" PetscInt_FMT " x %" PetscInt_FMT " with %" PetscInt_FMT " entries per (interior) element is likely too large for 32-bit indices",
362              stag->n[0], stag->n[1], stag->n[2], stag->entriesPerElement);
363 #endif
364 
365   /* Compute offsets for each rank into global vectors
366 
367     This again requires O(P) storage, which could be replaced with some global
368     communication.
369   */
370   PetscCall(DMStagSetUpBuildGlobalOffsets_3d(dm, &globalOffsets));
371 
372   for (d = 0; d < dim; ++d)
373     PetscCheck(stag->boundaryType[d] == DM_BOUNDARY_NONE || stag->boundaryType[d] == DM_BOUNDARY_PERIODIC || stag->boundaryType[d] == DM_BOUNDARY_GHOSTED, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported boundary type");
374 
375   /* Define ghosted/local sizes */
376   for (d = 0; d < dim; ++d) {
377     switch (stag->boundaryType[d]) {
378     case DM_BOUNDARY_NONE:
379       /* Note: for a elements-only DMStag, the extra elements on the edges aren't necessary but we include them anyway */
380       switch (stag->stencilType) {
381       case DMSTAG_STENCIL_NONE: /* only the extra one on the right/top edges */
382         stag->nGhost[d]     = stag->n[d];
383         stag->startGhost[d] = stag->start[d];
384         if (stag->lastRank[d]) stag->nGhost[d] += 1;
385         break;
386       case DMSTAG_STENCIL_STAR: /* allocate the corners but don't use them */
387       case DMSTAG_STENCIL_BOX:
388         stag->nGhost[d]     = stag->n[d];
389         stag->startGhost[d] = stag->start[d];
390         if (!stag->firstRank[d]) {
391           stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
392           stag->startGhost[d] -= stag->stencilWidth;
393         }
394         if (!stag->lastRank[d]) {
395           stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
396         } else {
397           stag->nGhost[d] += 1; /* one element on the boundary to complete blocking */
398         }
399         break;
400       default:
401         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
402       }
403       break;
404     case DM_BOUNDARY_GHOSTED:
405       switch (stag->stencilType) {
406       case DMSTAG_STENCIL_NONE:
407         stag->startGhost[d] = stag->start[d];
408         stag->nGhost[d]     = stag->n[d] + (stag->lastRank[d] ? 1 : 0);
409         break;
410       case DMSTAG_STENCIL_STAR:
411       case DMSTAG_STENCIL_BOX:
412         stag->startGhost[d] = stag->start[d] - stag->stencilWidth; /* This value may be negative */
413         stag->nGhost[d]     = stag->n[d] + 2 * stag->stencilWidth + (stag->lastRank[d] && stag->stencilWidth == 0 ? 1 : 0);
414         break;
415       default:
416         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
417       }
418       break;
419     case DM_BOUNDARY_PERIODIC:
420       switch (stag->stencilType) {
421       case DMSTAG_STENCIL_NONE: /* only the extra one on the right/top edges */
422         stag->nGhost[d]     = stag->n[d];
423         stag->startGhost[d] = stag->start[d];
424         break;
425       case DMSTAG_STENCIL_STAR:
426       case DMSTAG_STENCIL_BOX:
427         stag->nGhost[d]     = stag->n[d] + 2 * stag->stencilWidth;
428         stag->startGhost[d] = stag->start[d] - stag->stencilWidth;
429         break;
430       default:
431         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
432       }
433       break;
434     default:
435       SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported boundary type in dimension %" PetscInt_FMT, d);
436     }
437   }
438   stag->entriesGhost = stag->nGhost[0] * stag->nGhost[1] * stag->nGhost[2] * stag->entriesPerElement;
439 
440   /* Create global-->local VecScatter and local->global ISLocalToGlobalMapping */
441   PetscCall(DMStagSetUpBuildScatter_3d(dm, globalOffsets));
442   PetscCall(DMStagSetUpBuildL2G_3d(dm, globalOffsets));
443 
444   /* In special cases, create a dedicated injective local-to-global map */
445   if ((stag->boundaryType[0] == DM_BOUNDARY_PERIODIC && stag->nRanks[0] == 1) || (stag->boundaryType[1] == DM_BOUNDARY_PERIODIC && stag->nRanks[1] == 1) || (stag->boundaryType[2] == DM_BOUNDARY_PERIODIC && stag->nRanks[2] == 1)) {
446     PetscCall(DMStagPopulateLocalToGlobalInjective(dm));
447   }
448 
449   /* Free global offsets */
450   PetscCall(PetscFree(globalOffsets));
451 
452   /* Precompute location offsets */
453   PetscCall(DMStagComputeLocationOffsets_3d(dm));
454 
455   /* View from Options */
456   PetscCall(DMViewFromOptions(dm, NULL, "-dm_view"));
457   PetscFunctionReturn(PETSC_SUCCESS);
458 }
459 
460 /* adapted from da3.c */
DMStagSetUpBuildRankGrid_3d(DM dm)461 static PetscErrorCode DMStagSetUpBuildRankGrid_3d(DM dm)
462 {
463   PetscMPIInt    rank, size;
464   PetscMPIInt    m, n, p, pm;
465   DM_Stag *const stag = (DM_Stag *)dm->data;
466   const PetscInt M    = stag->N[0];
467   const PetscInt N    = stag->N[1];
468   const PetscInt P    = stag->N[2];
469 
470   PetscFunctionBegin;
471   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
472   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
473 
474   m = stag->nRanks[0];
475   n = stag->nRanks[1];
476   p = stag->nRanks[2];
477 
478   if (m != PETSC_DECIDE) {
479     PetscCheck(m >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Non-positive number of processors in X direction: %d", m);
480     PetscCheck(m <= size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Too many processors in X direction: %d %d", m, size);
481   }
482   if (n != PETSC_DECIDE) {
483     PetscCheck(n >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Non-positive number of processors in Y direction: %d", n);
484     PetscCheck(n <= size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Too many processors in Y direction: %d %d", n, size);
485   }
486   if (p != PETSC_DECIDE) {
487     PetscCheck(p >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Non-positive number of processors in Z direction: %d", p);
488     PetscCheck(p <= size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Too many processors in Z direction: %d %d", p, size);
489   }
490   PetscCheck(m <= 0 || n <= 0 || p <= 0 || m * n * p == size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "m %d * n %d * p %d != size %d", m, n, p, size);
491 
492   /* Partition the array among the processors */
493   if (m == PETSC_DECIDE && n != PETSC_DECIDE && p != PETSC_DECIDE) {
494     m = size / (n * p);
495   } else if (m != PETSC_DECIDE && n == PETSC_DECIDE && p != PETSC_DECIDE) {
496     n = size / (m * p);
497   } else if (m != PETSC_DECIDE && n != PETSC_DECIDE && p == PETSC_DECIDE) {
498     p = size / (m * n);
499   } else if (m == PETSC_DECIDE && n == PETSC_DECIDE && p != PETSC_DECIDE) {
500     /* try for squarish distribution */
501     m = (PetscMPIInt)(0.5 + PetscSqrtReal(((PetscReal)M) * ((PetscReal)size) / ((PetscReal)N * p)));
502     if (!m) m = 1;
503     while (m > 0) {
504       n = size / (m * p);
505       if (m * n * p == size) break;
506       m--;
507     }
508     PetscCheck(m, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "bad p value: p = %d", p);
509     if (M > N && m < n) {
510       PetscMPIInt _m = m;
511       m              = n;
512       n              = _m;
513     }
514   } else if (m == PETSC_DECIDE && n != PETSC_DECIDE && p == PETSC_DECIDE) {
515     /* try for squarish distribution */
516     m = (PetscMPIInt)(0.5 + PetscSqrtReal(((PetscReal)M) * ((PetscReal)size) / ((PetscReal)P * n)));
517     if (!m) m = 1;
518     while (m > 0) {
519       p = size / (m * n);
520       if (m * n * p == size) break;
521       m--;
522     }
523     PetscCheck(m, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "bad n value: n = %d", n);
524     if (M > P && m < p) {
525       PetscMPIInt _m = m;
526       m              = p;
527       p              = _m;
528     }
529   } else if (m != PETSC_DECIDE && n == PETSC_DECIDE && p == PETSC_DECIDE) {
530     /* try for squarish distribution */
531     n = (PetscMPIInt)(0.5 + PetscSqrtReal(((PetscReal)N) * ((PetscReal)size) / ((PetscReal)P * m)));
532     if (!n) n = 1;
533     while (n > 0) {
534       p = size / (m * n);
535       if (m * n * p == size) break;
536       n--;
537     }
538     PetscCheck(n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "bad m value: m = %d", n);
539     if (N > P && n < p) {
540       PetscMPIInt _n = n;
541       n              = p;
542       p              = _n;
543     }
544   } else if (m == PETSC_DECIDE && n == PETSC_DECIDE && p == PETSC_DECIDE) {
545     /* try for squarish distribution */
546     n = (PetscMPIInt)(0.5 + PetscPowReal(((PetscReal)N * N) * ((PetscReal)size) / ((PetscReal)P * M), (PetscReal)(1. / 3.)));
547     if (!n) n = 1;
548     while (n > 0) {
549       pm = size / n;
550       if (n * pm == size) break;
551       n--;
552     }
553     if (!n) n = 1;
554     m = (PetscMPIInt)(0.5 + PetscSqrtReal(((PetscReal)M) * ((PetscReal)size) / ((PetscReal)P * n)));
555     if (!m) m = 1;
556     while (m > 0) {
557       p = size / (m * n);
558       if (m * n * p == size) break;
559       m--;
560     }
561     if (M > P && m < p) {
562       PetscMPIInt _m = m;
563       m              = p;
564       p              = _m;
565     }
566   } else PetscCheck(m * n * p == size, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Given Bad partition");
567 
568   PetscCheck(m * n * p == size, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Could not find good partition");
569   PetscCheck(M >= m, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Partition in x direction is too fine! %" PetscInt_FMT " %d", M, m);
570   PetscCheck(N >= n, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Partition in y direction is too fine! %" PetscInt_FMT " %d", N, n);
571   PetscCheck(P >= p, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Partition in z direction is too fine! %" PetscInt_FMT " %d", P, p);
572 
573   stag->nRanks[0] = m;
574   stag->nRanks[1] = n;
575   stag->nRanks[2] = p;
576   PetscFunctionReturn(PETSC_SUCCESS);
577 }
578 
579 /* Determine ranks of neighbors, using DMDA's convention
580 
581         n24 n25 n26
582         n21 n22 n23
583         n18 n19 n20 (Front, bigger z)
584 
585         n15 n16 n17
586         n12     n14   ^ y
587         n9  n10 n11   |
588                       +--> x
589         n6  n7  n8
590         n3  n4  n5
591         n0  n1  n2 (Back, smaller z) */
DMStagSetUpBuildNeighbors_3d(DM dm)592 static PetscErrorCode DMStagSetUpBuildNeighbors_3d(DM dm)
593 {
594   DM_Stag *const stag = (DM_Stag *)dm->data;
595   PetscInt       d, i;
596   PetscBool      per[3], first[3], last[3];
597   PetscMPIInt    neighborRank[27][3], r[3], n[3];
598   const PetscInt dim = 3;
599 
600   PetscFunctionBegin;
601   for (d = 0; d < dim; ++d)
602     PetscCheck(stag->boundaryType[d] == DM_BOUNDARY_NONE || stag->boundaryType[d] == DM_BOUNDARY_PERIODIC || stag->boundaryType[d] == DM_BOUNDARY_GHOSTED, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Neighbor determination not implemented for %s",
603                DMBoundaryTypes[stag->boundaryType[d]]);
604 
605   /* Assemble some convenience variables */
606   for (d = 0; d < dim; ++d) {
607     per[d]   = (PetscBool)(stag->boundaryType[d] == DM_BOUNDARY_PERIODIC);
608     first[d] = stag->firstRank[d];
609     last[d]  = stag->lastRank[d];
610     r[d]     = stag->rank[d];
611     n[d]     = stag->nRanks[d];
612   }
613 
614   /* First, compute the position in the rank grid for all neighbors */
615 
616   neighborRank[0][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  down back  */
617   neighborRank[0][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
618   neighborRank[0][2] = first[2] ? (per[2] ? n[2] - 1 : -1) : r[2] - 1;
619 
620   neighborRank[1][0] = r[0]; /*       down back  */
621   neighborRank[1][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
622   neighborRank[1][2] = first[2] ? (per[2] ? n[2] - 1 : -1) : r[2] - 1;
623 
624   neighborRank[2][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right down back  */
625   neighborRank[2][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
626   neighborRank[2][2] = first[2] ? (per[2] ? n[2] - 1 : -1) : r[2] - 1;
627 
628   neighborRank[3][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left       back  */
629   neighborRank[3][1] = r[1];
630   neighborRank[3][2] = first[2] ? (per[2] ? n[2] - 1 : -1) : r[2] - 1;
631 
632   neighborRank[4][0] = r[0]; /*            back  */
633   neighborRank[4][1] = r[1];
634   neighborRank[4][2] = first[2] ? (per[2] ? n[2] - 1 : -1) : r[2] - 1;
635 
636   neighborRank[5][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right      back  */
637   neighborRank[5][1] = r[1];
638   neighborRank[5][2] = first[2] ? (per[2] ? n[2] - 1 : -1) : r[2] - 1;
639 
640   neighborRank[6][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  up   back  */
641   neighborRank[6][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
642   neighborRank[6][2] = first[2] ? (per[2] ? n[2] - 1 : -1) : r[2] - 1;
643 
644   neighborRank[7][0] = r[0]; /*       up   back  */
645   neighborRank[7][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
646   neighborRank[7][2] = first[2] ? (per[2] ? n[2] - 1 : -1) : r[2] - 1;
647 
648   neighborRank[8][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right up   back  */
649   neighborRank[8][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
650   neighborRank[8][2] = first[2] ? (per[2] ? n[2] - 1 : -1) : r[2] - 1;
651 
652   neighborRank[9][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  down       */
653   neighborRank[9][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
654   neighborRank[9][2] = r[2];
655 
656   neighborRank[10][0] = r[0]; /*       down       */
657   neighborRank[10][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
658   neighborRank[10][2] = r[2];
659 
660   neighborRank[11][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right down       */
661   neighborRank[11][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
662   neighborRank[11][2] = r[2];
663 
664   neighborRank[12][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left             */
665   neighborRank[12][1] = r[1];
666   neighborRank[12][2] = r[2];
667 
668   neighborRank[13][0] = r[0];
669   neighborRank[13][1] = r[1];
670   neighborRank[13][2] = r[2];
671 
672   neighborRank[14][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right            */
673   neighborRank[14][1] = r[1];
674   neighborRank[14][2] = r[2];
675 
676   neighborRank[15][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  up         */
677   neighborRank[15][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
678   neighborRank[15][2] = r[2];
679 
680   neighborRank[16][0] = r[0]; /*       up         */
681   neighborRank[16][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
682   neighborRank[16][2] = r[2];
683 
684   neighborRank[17][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right up         */
685   neighborRank[17][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
686   neighborRank[17][2] = r[2];
687 
688   neighborRank[18][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  down front */
689   neighborRank[18][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
690   neighborRank[18][2] = last[2] ? (per[2] ? 0 : -1) : r[2] + 1;
691 
692   neighborRank[19][0] = r[0]; /*       down front */
693   neighborRank[19][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
694   neighborRank[19][2] = last[2] ? (per[2] ? 0 : -1) : r[2] + 1;
695 
696   neighborRank[20][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right down front */
697   neighborRank[20][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
698   neighborRank[20][2] = last[2] ? (per[2] ? 0 : -1) : r[2] + 1;
699 
700   neighborRank[21][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left       front */
701   neighborRank[21][1] = r[1];
702   neighborRank[21][2] = last[2] ? (per[2] ? 0 : -1) : r[2] + 1;
703 
704   neighborRank[22][0] = r[0]; /*            front */
705   neighborRank[22][1] = r[1];
706   neighborRank[22][2] = last[2] ? (per[2] ? 0 : -1) : r[2] + 1;
707 
708   neighborRank[23][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right      front */
709   neighborRank[23][1] = r[1];
710   neighborRank[23][2] = last[2] ? (per[2] ? 0 : -1) : r[2] + 1;
711 
712   neighborRank[24][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  up   front */
713   neighborRank[24][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
714   neighborRank[24][2] = last[2] ? (per[2] ? 0 : -1) : r[2] + 1;
715 
716   neighborRank[25][0] = r[0]; /*       up   front */
717   neighborRank[25][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
718   neighborRank[25][2] = last[2] ? (per[2] ? 0 : -1) : r[2] + 1;
719 
720   neighborRank[26][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right up   front */
721   neighborRank[26][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
722   neighborRank[26][2] = last[2] ? (per[2] ? 0 : -1) : r[2] + 1;
723 
724   /* Then, compute the rank of each in the linear ordering */
725   PetscCall(PetscMalloc1(27, &stag->neighbors));
726   for (i = 0; i < 27; ++i) {
727     if (neighborRank[i][0] >= 0 && neighborRank[i][1] >= 0 && neighborRank[i][2] >= 0) {
728       stag->neighbors[i] = neighborRank[i][0] + n[0] * neighborRank[i][1] + n[0] * n[1] * neighborRank[i][2];
729     } else {
730       stag->neighbors[i] = -1;
731     }
732   }
733   PetscFunctionReturn(PETSC_SUCCESS);
734 }
735 
DMStagSetUpBuildGlobalOffsets_3d(DM dm,PetscInt ** pGlobalOffsets)736 static PetscErrorCode DMStagSetUpBuildGlobalOffsets_3d(DM dm, PetscInt **pGlobalOffsets)
737 {
738   const DM_Stag *const stag = (DM_Stag *)dm->data;
739   PetscInt            *globalOffsets;
740   PetscInt             i, j, k, d, entriesPerEdge, entriesPerFace, count;
741   PetscMPIInt          size;
742   PetscBool            extra[3];
743 
744   PetscFunctionBegin;
745   for (d = 0; d < 3; ++d) extra[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC); /* Extra points in global rep */
746   entriesPerFace = stag->dof[0] + 2 * stag->dof[1] + stag->dof[2];
747   entriesPerEdge = stag->dof[0] + stag->dof[1];
748   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
749   PetscCall(PetscMalloc1(size, pGlobalOffsets));
750   globalOffsets    = *pGlobalOffsets;
751   globalOffsets[0] = 0;
752   count            = 1; /* note the count is offset by 1 here. We add the size of the previous rank */
753   for (k = 0; k < stag->nRanks[2] - 1; ++k) {
754     const PetscInt nnk = stag->l[2][k];
755     for (j = 0; j < stag->nRanks[1] - 1; ++j) {
756       const PetscInt nnj = stag->l[1][j];
757       for (i = 0; i < stag->nRanks[0] - 1; ++i) {
758         const PetscInt nni = stag->l[0][i];
759         /* Interior : No right/top/front boundaries */
760         globalOffsets[count] = globalOffsets[count - 1] + nni * nnj * nnk * stag->entriesPerElement;
761         ++count;
762       }
763       {
764         /* Right boundary - extra faces */
765         /* i = stag->nRanks[0]-1; */
766         const PetscInt nni   = stag->l[0][i];
767         globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * nnk * stag->entriesPerElement + (extra[0] ? nnj * nnk * entriesPerFace : 0);
768         ++count;
769       }
770     }
771     {
772       /* j = stag->nRanks[1]-1; */
773       const PetscInt nnj = stag->l[1][j];
774       for (i = 0; i < stag->nRanks[0] - 1; ++i) {
775         const PetscInt nni = stag->l[0][i];
776         /* Up boundary - extra faces */
777         globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * nnk * stag->entriesPerElement + (extra[1] ? nni * nnk * entriesPerFace : 0);
778         ++count;
779       }
780       {
781         /* i = stag->nRanks[0]-1; */
782         const PetscInt nni = stag->l[0][i];
783         /* Up right boundary - 2x extra faces and extra edges */
784         globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * nnk * stag->entriesPerElement + (extra[0] ? nnj * nnk * entriesPerFace : 0) + (extra[1] ? nni * nnk * entriesPerFace : 0) + (extra[0] && extra[1] ? nnk * entriesPerEdge : 0);
785         ++count;
786       }
787     }
788   }
789   {
790     /* k = stag->nRanks[2]-1; */
791     const PetscInt nnk = stag->l[2][k];
792     for (j = 0; j < stag->nRanks[1] - 1; ++j) {
793       const PetscInt nnj = stag->l[1][j];
794       for (i = 0; i < stag->nRanks[0] - 1; ++i) {
795         const PetscInt nni = stag->l[0][i];
796         /* Front boundary - extra faces */
797         globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * nnk * stag->entriesPerElement + (extra[2] ? nni * nnj * entriesPerFace : 0);
798         ++count;
799       }
800       {
801         /* i = stag->nRanks[0]-1; */
802         const PetscInt nni = stag->l[0][i];
803         /* Front right boundary - 2x extra faces and extra edges */
804         globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * nnk * stag->entriesPerElement + (extra[0] ? nnk * nnj * entriesPerFace : 0) + (extra[2] ? nni * nnj * entriesPerFace : 0) + (extra[0] && extra[2] ? nnj * entriesPerEdge : 0);
805         ++count;
806       }
807     }
808     {
809       /* j = stag->nRanks[1]-1; */
810       const PetscInt nnj = stag->l[1][j];
811       for (i = 0; i < stag->nRanks[0] - 1; ++i) {
812         const PetscInt nni = stag->l[0][i];
813         /* Front up boundary - 2x extra faces and extra edges */
814         globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * nnk * stag->entriesPerElement + (extra[1] ? nnk * nni * entriesPerFace : 0) + (extra[2] ? nnj * nni * entriesPerFace : 0) + (extra[1] && extra[2] ? nni * entriesPerEdge : 0);
815         ++count;
816       }
817       /* Don't need to compute entries in last element */
818     }
819   }
820   PetscFunctionReturn(PETSC_SUCCESS);
821 }
822 
823 /* A helper function to reduce code duplication as we loop over various ranges.
824    i,j,k refer to element numbers on the rank where an element lives in the global
825    representation (without ghosts) to be offset by a global offset per rank.
826    ig,jg,kg refer to element numbers in the local (this rank) ghosted numbering.
827    Note that this function could easily be converted to a macro (it does not compute
828    anything except loop indices and the values of idxGlobal and idxLocal).  */
DMStagSetUpBuildScatterPopulateIdx_3d(DM_Stag * stag,PetscInt * count,PetscInt * idxLocal,PetscInt * idxGlobal,PetscInt entriesPerEdge,PetscInt entriesPerFace,PetscInt eprNeighbor,PetscInt eplNeighbor,PetscInt eprGhost,PetscInt eplGhost,PetscInt epFaceRow,PetscInt globalOffset,PetscInt startx,PetscInt starty,PetscInt startz,PetscInt startGhostx,PetscInt startGhosty,PetscInt startGhostz,PetscInt endGhostx,PetscInt endGhosty,PetscInt endGhostz,PetscBool extrax,PetscBool extray,PetscBool extraz)829 static PetscErrorCode DMStagSetUpBuildScatterPopulateIdx_3d(DM_Stag *stag, PetscInt *count, PetscInt *idxLocal, PetscInt *idxGlobal, PetscInt entriesPerEdge, PetscInt entriesPerFace, PetscInt eprNeighbor, PetscInt eplNeighbor, PetscInt eprGhost, PetscInt eplGhost, PetscInt epFaceRow, PetscInt globalOffset, PetscInt startx, PetscInt starty, PetscInt startz, PetscInt startGhostx, PetscInt startGhosty, PetscInt startGhostz, PetscInt endGhostx, PetscInt endGhosty, PetscInt endGhostz, PetscBool extrax, PetscBool extray, PetscBool extraz)
830 {
831   PetscInt ig, jg, kg, d, c;
832 
833   PetscFunctionBegin;
834   c = *count;
835   for (kg = startGhostz; kg < endGhostz; ++kg) {
836     const PetscInt k = kg - startGhostz + startz;
837     for (jg = startGhosty; jg < endGhosty; ++jg) {
838       const PetscInt j = jg - startGhosty + starty;
839       for (ig = startGhostx; ig < endGhostx; ++ig) {
840         const PetscInt i = ig - startGhostx + startx;
841         for (d = 0; d < stag->entriesPerElement; ++d, ++c) {
842           idxGlobal[c] = globalOffset + k * eplNeighbor + j * eprNeighbor + i * stag->entriesPerElement + d;
843           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + d;
844         }
845       }
846       if (extrax) {
847         PetscInt       dLocal;
848         const PetscInt i = endGhostx - startGhostx + startx;
849         ig               = endGhostx;
850         for (d = 0, dLocal = 0; d < stag->dof[0]; ++d, ++dLocal, ++c) { /* Vertex */
851           idxGlobal[c] = globalOffset + k * eplNeighbor + j * eprNeighbor + i * stag->entriesPerElement + d;
852           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + dLocal;
853         }
854         dLocal += stag->dof[1];                                                /* Skip back down edge */
855         for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++c) { /* Back left edge */
856           idxGlobal[c] = globalOffset + k * eplNeighbor + j * eprNeighbor + i * stag->entriesPerElement + d;
857           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + dLocal;
858         }
859         dLocal += stag->dof[2];                                                               /* Skip back face */
860         for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++c) { /* Down left edge */
861           idxGlobal[c] = globalOffset + k * eplNeighbor + j * eprNeighbor + i * stag->entriesPerElement + d;
862           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + dLocal;
863         }
864         dLocal += stag->dof[2];                                                                   /* Skip down face */
865         for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2]; ++d, ++dLocal, ++c) { /* Left face */
866           idxGlobal[c] = globalOffset + k * eplNeighbor + j * eprNeighbor + i * stag->entriesPerElement + d;
867           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + dLocal;
868         }
869         /* Skip element */
870       }
871     }
872     if (extray) {
873       const PetscInt j = endGhosty - startGhosty + starty;
874       jg               = endGhosty;
875       for (ig = startGhostx; ig < endGhostx; ++ig) {
876         const PetscInt i = ig - startGhostx + startx;
877         /* Vertex and Back down edge */
878         PetscInt dLocal;
879         for (d = 0, dLocal = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++c) {              /* Vertex */
880           idxGlobal[c] = globalOffset + k * eplNeighbor + j * eprNeighbor + i * entriesPerFace + d; /* Note face increment in  x */
881           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + dLocal;
882         }
883         /* Skip back left edge and back face */
884         dLocal += stag->dof[1] + stag->dof[2];
885         /* Down face and down left edge */
886         for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++d, ++dLocal, ++c) {   /* Back left edge */
887           idxGlobal[c] = globalOffset + k * eplNeighbor + j * eprNeighbor + i * entriesPerFace + d; /* Note face increment in x */
888           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + dLocal;
889         }
890         /* Skip left face and element */
891       }
892       if (extrax) {
893         PetscInt       dLocal;
894         const PetscInt i = endGhostx - startGhostx + startx;
895         ig               = endGhostx;
896         for (d = 0, dLocal = 0; d < stag->dof[0]; ++d, ++dLocal, ++c) {                             /* Vertex */
897           idxGlobal[c] = globalOffset + k * eplNeighbor + j * eprNeighbor + i * entriesPerFace + d; /* Note face increment in x */
898           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + dLocal;
899         }
900         dLocal += 2 * stag->dof[1] + stag->dof[2];                                                  /* Skip back down edge, back face, and back left edge */
901         for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++c) {       /* Down left edge */
902           idxGlobal[c] = globalOffset + k * eplNeighbor + j * eprNeighbor + i * entriesPerFace + d; /* Note face increment in x */
903           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + dLocal;
904         }
905         /* Skip remaining entries */
906       }
907     }
908   }
909   if (extraz) {
910     const PetscInt k = endGhostz - startGhostz + startz;
911     kg               = endGhostz;
912     for (jg = startGhosty; jg < endGhosty; ++jg) {
913       const PetscInt j = jg - startGhosty + starty;
914       for (ig = startGhostx; ig < endGhostx; ++ig) {
915         const PetscInt i = ig - startGhostx + startx;
916         for (d = 0; d < entriesPerFace; ++d, ++c) {
917           idxGlobal[c] = globalOffset + k * eplNeighbor + j * epFaceRow + i * entriesPerFace + d; /* Note face-based x and y increments */
918           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + d;
919         }
920       }
921       if (extrax) {
922         PetscInt       dLocal;
923         const PetscInt i = endGhostx - startGhostx + startx;
924         ig               = endGhostx;
925         for (d = 0, dLocal = 0; d < stag->dof[0]; ++d, ++dLocal, ++c) {                           /* Vertex */
926           idxGlobal[c] = globalOffset + k * eplNeighbor + j * epFaceRow + i * entriesPerFace + d; /* Note face-based x and y increments */
927           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + dLocal;
928         }
929         dLocal += stag->dof[1];                                                                   /* Skip back down edge */
930         for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++c) {                    /* Back left edge */
931           idxGlobal[c] = globalOffset + k * eplNeighbor + j * epFaceRow + i * entriesPerFace + d; /* Note face-based x and y increments */
932           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + dLocal;
933         }
934         /* Skip the rest */
935       }
936     }
937     if (extray) {
938       const PetscInt j = endGhosty - startGhosty + starty;
939       jg               = endGhosty;
940       for (ig = startGhostx; ig < endGhostx; ++ig) {
941         const PetscInt i = ig - startGhostx + startx;
942         for (d = 0; d < entriesPerEdge; ++d, ++c) {
943           idxGlobal[c] = globalOffset + k * eplNeighbor + j * epFaceRow + i * entriesPerEdge + d; /* Note face-based y increment and edge-based x increment */
944           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + d;
945         }
946       }
947       if (extrax) {
948         const PetscInt i = endGhostx - startGhostx + startx;
949         ig               = endGhostx;
950         for (d = 0; d < stag->dof[0]; ++d, ++c) {                                                 /* Vertex (only) */
951           idxGlobal[c] = globalOffset + k * eplNeighbor + j * epFaceRow + i * entriesPerEdge + d; /* Note face-based y increment and edge-based x increment */
952           idxLocal[c]  = kg * eplGhost + jg * eprGhost + ig * stag->entriesPerElement + d;
953         }
954       }
955     }
956   }
957   *count = c;
958   PetscFunctionReturn(PETSC_SUCCESS);
959 }
960 
DMStagSetUpBuildScatter_3d(DM dm,const PetscInt * globalOffsets)961 static PetscErrorCode DMStagSetUpBuildScatter_3d(DM dm, const PetscInt *globalOffsets)
962 {
963   DM_Stag *const stag = (DM_Stag *)dm->data;
964   PetscInt       d, ghostOffsetStart[3], ghostOffsetEnd[3], entriesPerCorner, entriesPerEdge, entriesPerFace, entriesToTransferTotal, count, eprGhost, eplGhost;
965   PetscInt      *idxLocal, *idxGlobal;
966   PetscMPIInt    rank;
967   PetscInt       nNeighbors[27][3];
968   PetscBool      star, nextToDummyEnd[3], dummyStart[3], dummyEnd[3];
969 
970   PetscFunctionBegin;
971   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
972   if (stag->stencilType != DMSTAG_STENCIL_NONE && (stag->n[0] < stag->stencilWidth || stag->n[1] < stag->stencilWidth || stag->n[2] < stag->stencilWidth)) {
973     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "DMStag 3d setup does not support local sizes (%" PetscInt_FMT " x %" PetscInt_FMT " x %" PetscInt_FMT ") smaller than the elementwise stencil width (%" PetscInt_FMT ")", stag->n[0], stag->n[1], stag->n[2],
974             stag->stencilWidth);
975   }
976 
977   /* Check stencil type */
978   PetscCheck(stag->stencilType == DMSTAG_STENCIL_NONE || stag->stencilType == DMSTAG_STENCIL_BOX || stag->stencilType == DMSTAG_STENCIL_STAR, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported stencil type %s", DMStagStencilTypes[stag->stencilType]);
979   star = (PetscBool)(stag->stencilType == DMSTAG_STENCIL_STAR || stag->stencilType == DMSTAG_STENCIL_NONE);
980 
981   /* Compute numbers of elements on each neighbor */
982   {
983     PetscInt i;
984     for (i = 0; i < 27; ++i) {
985       const PetscInt neighborRank = stag->neighbors[i];
986       if (neighborRank >= 0) { /* note we copy the values for our own rank (neighbor 13) */
987         nNeighbors[i][0] = stag->l[0][neighborRank % stag->nRanks[0]];
988         nNeighbors[i][1] = stag->l[1][neighborRank % (stag->nRanks[0] * stag->nRanks[1]) / stag->nRanks[0]];
989         nNeighbors[i][2] = stag->l[2][neighborRank / (stag->nRanks[0] * stag->nRanks[1])];
990       } /* else leave uninitialized - error if accessed */
991     }
992   }
993 
994   /* These offsets should always be non-negative, and describe how many
995      ghost elements exist at each boundary. These are not always equal to the stencil width,
996      because we may have different numbers of ghost elements at the boundaries. In particular,
997      in the non-periodic casewe always have at least one ghost (dummy) element at the right/top/front. */
998   for (d = 0; d < 3; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
999   for (d = 0; d < 3; ++d) ghostOffsetEnd[d] = stag->startGhost[d] + stag->nGhost[d] - (stag->start[d] + stag->n[d]);
1000 
1001   /* Determine whether the ghost region includes dummies or not. This is currently
1002      equivalent to having a non-periodic boundary. If not, then
1003      ghostOffset{Start,End}[d] elements correspond to elements on the neighbor.
1004      If true, then
1005      - at the start, there are ghostOffsetStart[d] ghost elements
1006      - at the end, there is a layer of extra "physical" points inside a layer of
1007        ghostOffsetEnd[d] ghost elements
1008      Note that this computation should be updated if any boundary types besides
1009      NONE, GHOSTED, and PERIODIC are supported.  */
1010   for (d = 0; d < 3; ++d) dummyStart[d] = (PetscBool)(stag->firstRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1011   for (d = 0; d < 3; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1012 
1013   /* Convenience variables */
1014   entriesPerFace   = stag->dof[0] + 2 * stag->dof[1] + stag->dof[2];
1015   entriesPerEdge   = stag->dof[0] + stag->dof[1];
1016   entriesPerCorner = stag->dof[0];
1017   for (d = 0; d < 3; ++d) nextToDummyEnd[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->rank[d] == stag->nRanks[d] - 2);
1018   eprGhost = stag->nGhost[0] * stag->entriesPerElement; /* epr = entries per (element) row   */
1019   eplGhost = stag->nGhost[1] * eprGhost;                /* epl = entries per (element) layer */
1020 
1021   /* Compute the number of local entries which correspond to any global entry */
1022   {
1023     PetscInt nNonDummyGhost[3];
1024     for (d = 0; d < 3; ++d) nNonDummyGhost[d] = stag->nGhost[d] - (dummyStart[d] ? ghostOffsetStart[d] : 0) - (dummyEnd[d] ? ghostOffsetEnd[d] : 0);
1025     if (star) {
1026       entriesToTransferTotal = (nNonDummyGhost[0] * stag->n[1] * stag->n[2] + stag->n[0] * nNonDummyGhost[1] * stag->n[2] + stag->n[0] * stag->n[1] * nNonDummyGhost[2] - 2 * (stag->n[0] * stag->n[1] * stag->n[2])) * stag->entriesPerElement +
1027                                (dummyEnd[0] ? (nNonDummyGhost[1] * stag->n[2] + stag->n[1] * nNonDummyGhost[2] - stag->n[1] * stag->n[2]) * entriesPerFace : 0) +
1028                                (dummyEnd[1] ? (nNonDummyGhost[0] * stag->n[2] + stag->n[0] * nNonDummyGhost[2] - stag->n[0] * stag->n[2]) * entriesPerFace : 0) +
1029                                (dummyEnd[2] ? (nNonDummyGhost[0] * stag->n[1] + stag->n[0] * nNonDummyGhost[1] - stag->n[0] * stag->n[1]) * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? nNonDummyGhost[2] * entriesPerEdge : 0) + (dummyEnd[2] && dummyEnd[0] ? nNonDummyGhost[1] * entriesPerEdge : 0) + (dummyEnd[1] && dummyEnd[2] ? nNonDummyGhost[0] * entriesPerEdge : 0) + (dummyEnd[0] && dummyEnd[1] && dummyEnd[2] ? entriesPerCorner : 0);
1030     } else {
1031       entriesToTransferTotal = nNonDummyGhost[0] * nNonDummyGhost[1] * nNonDummyGhost[2] * stag->entriesPerElement + (dummyEnd[0] ? nNonDummyGhost[1] * nNonDummyGhost[2] * entriesPerFace : 0) + (dummyEnd[1] ? nNonDummyGhost[2] * nNonDummyGhost[0] * entriesPerFace : 0) + (dummyEnd[2] ? nNonDummyGhost[0] * nNonDummyGhost[1] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? nNonDummyGhost[2] * entriesPerEdge : 0) + (dummyEnd[2] && dummyEnd[0] ? nNonDummyGhost[1] * entriesPerEdge : 0) + (dummyEnd[1] && dummyEnd[2] ? nNonDummyGhost[0] * entriesPerEdge : 0) + (dummyEnd[0] && dummyEnd[1] && dummyEnd[2] ? entriesPerCorner : 0);
1032     }
1033   }
1034 
1035   /* Allocate arrays to populate */
1036   PetscCall(PetscMalloc1(entriesToTransferTotal, &idxLocal));
1037   PetscCall(PetscMalloc1(entriesToTransferTotal, &idxGlobal));
1038 
1039   /* Counts into idxLocal/idxGlobal */
1040   count = 0;
1041 
1042   /*  Loop over each of the 27 neighbor, populating idxLocal and idxGlobal. These
1043       cases are principally distinguished by
1044 
1045       1. The loop bounds (i/ighost,j/jghost,k/kghost)
1046       2. the strides used in the loop body, which depend on whether the current
1047       rank and/or its neighbor are a non-periodic right/top/front boundary, which has additional
1048       points in the global representation.
1049       - If the neighboring rank is a right/top/front boundary,
1050       then eprNeighbor (entries per element row on the neighbor) and/or eplNeighbor (entries per element layer on the neighbor)
1051       are different.
1052       - If this rank is a non-periodic right/top/front boundary (checking entries of stag->lastRank),
1053       there is an extra loop over 1-3 boundary faces)
1054 
1055       Here, we do not include "dummy" dof (points in the local representation which
1056       do not correspond to any global dof). This, and the fact that we iterate over points in terms of
1057       increasing global ordering, are the main two differences from the construction of
1058       the Local-to-global map, which iterates over points in local order, and does include dummy points. */
1059 
1060   /* LEFT DOWN BACK */
1061   if (!star && !dummyStart[0] && !dummyStart[1] && !dummyStart[2]) {
1062     const PetscInt  neighbor    = 0;
1063     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0];
1064     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1];
1065     const PetscInt  epFaceRow   = -1;
1066     const PetscInt  start0      = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1067     const PetscInt  start1      = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1068     const PetscInt  start2      = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1069     const PetscInt  startGhost0 = 0;
1070     const PetscInt  startGhost1 = 0;
1071     const PetscInt  startGhost2 = 0;
1072     const PetscInt  endGhost0   = ghostOffsetStart[0];
1073     const PetscInt  endGhost1   = ghostOffsetStart[1];
1074     const PetscInt  endGhost2   = ghostOffsetStart[2];
1075     const PetscBool extra0      = PETSC_FALSE;
1076     const PetscBool extra1      = PETSC_FALSE;
1077     const PetscBool extra2      = PETSC_FALSE;
1078     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1079   }
1080 
1081   /* DOWN BACK */
1082   if (!star && !dummyStart[1] && !dummyStart[2]) {
1083     const PetscInt  neighbor    = 1;
1084     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1085     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1];
1086     const PetscInt  epFaceRow   = -1;
1087     const PetscInt  start0      = 0;
1088     const PetscInt  start1      = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1089     const PetscInt  start2      = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1090     const PetscInt  startGhost0 = ghostOffsetStart[0];
1091     const PetscInt  startGhost1 = 0;
1092     const PetscInt  startGhost2 = 0;
1093     const PetscInt  endGhost0   = stag->nGhost[0] - ghostOffsetEnd[0];
1094     const PetscInt  endGhost1   = ghostOffsetStart[1];
1095     const PetscInt  endGhost2   = ghostOffsetStart[2];
1096     const PetscBool extra0      = dummyEnd[0];
1097     const PetscBool extra1      = PETSC_FALSE;
1098     const PetscBool extra2      = PETSC_FALSE;
1099     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1100   }
1101 
1102   /* RIGHT DOWN BACK */
1103   if (!star && !dummyEnd[0] && !dummyStart[1] && !dummyStart[2]) {
1104     const PetscInt  neighbor    = 2;
1105     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1106     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1];
1107     const PetscInt  epFaceRow   = -1;
1108     const PetscInt  start0      = 0;
1109     const PetscInt  start1      = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1110     const PetscInt  start2      = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1111     const PetscInt  startGhost0 = stag->nGhost[0] - ghostOffsetEnd[0];
1112     const PetscInt  startGhost1 = 0;
1113     const PetscInt  startGhost2 = 0;
1114     const PetscInt  endGhost0   = stag->nGhost[0];
1115     const PetscInt  endGhost1   = ghostOffsetStart[1];
1116     const PetscInt  endGhost2   = ghostOffsetStart[2];
1117     const PetscBool extra0      = PETSC_FALSE;
1118     const PetscBool extra1      = PETSC_FALSE;
1119     const PetscBool extra2      = PETSC_FALSE;
1120     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1121   }
1122 
1123   /* LEFT BACK */
1124   if (!star && !dummyStart[0] && !dummyStart[2]) {
1125     const PetscInt  neighbor    = 3;
1126     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0];
1127     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0); /* May be a top boundary */
1128     const PetscInt  epFaceRow   = -1;
1129     const PetscInt  start0      = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1130     const PetscInt  start1      = 0;
1131     const PetscInt  start2      = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1132     const PetscInt  startGhost0 = 0;
1133     const PetscInt  startGhost1 = ghostOffsetStart[1];
1134     const PetscInt  startGhost2 = 0;
1135     const PetscInt  endGhost0   = ghostOffsetStart[0];
1136     const PetscInt  endGhost1   = stag->nGhost[1] - ghostOffsetEnd[1];
1137     const PetscInt  endGhost2   = ghostOffsetStart[2];
1138     const PetscBool extra0      = PETSC_FALSE;
1139     const PetscBool extra1      = dummyEnd[1];
1140     const PetscBool extra2      = PETSC_FALSE;
1141     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1142   }
1143 
1144   /* BACK */
1145   if (!dummyStart[2]) {
1146     const PetscInt  neighbor    = 4;
1147     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                    /* We+neighbor may  be a right boundary */
1148     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* We+neighbor may be a top boundary */
1149     const PetscInt  epFaceRow   = -1;
1150     const PetscInt  start0      = 0;
1151     const PetscInt  start1      = 0;
1152     const PetscInt  start2      = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1153     const PetscInt  startGhost0 = ghostOffsetStart[0];
1154     const PetscInt  startGhost1 = ghostOffsetStart[1];
1155     const PetscInt  startGhost2 = 0;
1156     const PetscInt  endGhost0   = stag->nGhost[0] - ghostOffsetEnd[0];
1157     const PetscInt  endGhost1   = stag->nGhost[1] - ghostOffsetEnd[1];
1158     const PetscInt  endGhost2   = ghostOffsetStart[2];
1159     const PetscBool extra0      = dummyEnd[0];
1160     const PetscBool extra1      = dummyEnd[1];
1161     const PetscBool extra2      = PETSC_FALSE;
1162     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1163   }
1164 
1165   /* RIGHT BACK */
1166   if (!star && !dummyEnd[0] && !dummyStart[2]) {
1167     const PetscInt  neighbor    = 5;
1168     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                    /* Neighbor may be a right boundary */
1169     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1170     const PetscInt  epFaceRow   = -1;
1171     const PetscInt  start0      = 0;
1172     const PetscInt  start1      = 0;
1173     const PetscInt  start2      = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1174     const PetscInt  startGhost0 = stag->nGhost[0] - ghostOffsetEnd[0];
1175     const PetscInt  startGhost1 = ghostOffsetStart[1];
1176     const PetscInt  startGhost2 = 0;
1177     const PetscInt  endGhost0   = stag->nGhost[0];
1178     const PetscInt  endGhost1   = stag->nGhost[1] - ghostOffsetEnd[1];
1179     const PetscInt  endGhost2   = ghostOffsetStart[2];
1180     const PetscBool extra0      = PETSC_FALSE;
1181     const PetscBool extra1      = dummyEnd[1];
1182     const PetscBool extra2      = PETSC_FALSE;
1183     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1184   }
1185 
1186   /* LEFT UP BACK */
1187   if (!star && !dummyStart[0] && !dummyEnd[1] && !dummyStart[2]) {
1188     const PetscInt  neighbor    = 6;
1189     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0];
1190     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1191     const PetscInt  epFaceRow   = -1;
1192     const PetscInt  start0      = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1193     const PetscInt  start1      = 0;
1194     const PetscInt  start2      = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1195     const PetscInt  startGhost0 = 0;
1196     const PetscInt  startGhost1 = stag->nGhost[1] - ghostOffsetEnd[1];
1197     const PetscInt  startGhost2 = 0;
1198     const PetscInt  endGhost0   = ghostOffsetStart[0];
1199     const PetscInt  endGhost1   = stag->nGhost[1];
1200     const PetscInt  endGhost2   = ghostOffsetStart[2];
1201     const PetscBool extra0      = PETSC_FALSE;
1202     const PetscBool extra1      = PETSC_FALSE;
1203     const PetscBool extra2      = PETSC_FALSE;
1204     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1205   }
1206 
1207   /* UP BACK */
1208   if (!star && !dummyEnd[1] && !dummyStart[2]) {
1209     const PetscInt  neighbor    = 7;
1210     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                          /* We+neighbor may be a right boundary */
1211     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1212     const PetscInt  epFaceRow   = -1;
1213     const PetscInt  start0      = 0;
1214     const PetscInt  start1      = 0;
1215     const PetscInt  start2      = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1216     const PetscInt  startGhost0 = ghostOffsetStart[0];
1217     const PetscInt  startGhost1 = stag->nGhost[1] - ghostOffsetEnd[1];
1218     const PetscInt  startGhost2 = 0;
1219     const PetscInt  endGhost0   = stag->nGhost[0] - ghostOffsetEnd[0];
1220     const PetscInt  endGhost1   = stag->nGhost[1];
1221     const PetscInt  endGhost2   = ghostOffsetStart[2];
1222     const PetscBool extra0      = dummyEnd[0];
1223     const PetscBool extra1      = PETSC_FALSE;
1224     const PetscBool extra2      = PETSC_FALSE;
1225     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1226   }
1227 
1228   /* RIGHT UP BACK */
1229   if (!star && !dummyEnd[0] && !dummyEnd[1] && !dummyStart[2]) {
1230     const PetscInt  neighbor    = 8;
1231     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                          /* Neighbor may be a right boundary */
1232     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1233     const PetscInt  epFaceRow   = -1;
1234     const PetscInt  start0      = 0;
1235     const PetscInt  start1      = 0;
1236     const PetscInt  start2      = nNeighbors[neighbor][2] - ghostOffsetStart[2];
1237     const PetscInt  startGhost0 = stag->nGhost[0] - ghostOffsetEnd[0];
1238     const PetscInt  startGhost1 = stag->nGhost[1] - ghostOffsetEnd[1];
1239     const PetscInt  startGhost2 = 0;
1240     const PetscInt  endGhost0   = stag->nGhost[0];
1241     const PetscInt  endGhost1   = stag->nGhost[1];
1242     const PetscInt  endGhost2   = ghostOffsetStart[2];
1243     const PetscBool extra0      = PETSC_FALSE;
1244     const PetscBool extra1      = PETSC_FALSE;
1245     const PetscBool extra2      = PETSC_FALSE;
1246     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1247   }
1248 
1249   /* LEFT DOWN */
1250   if (!star && !dummyStart[0] && !dummyStart[1]) {
1251     const PetscInt  neighbor    = 9;
1252     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0];
1253     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1];
1254     const PetscInt  epFaceRow   = entriesPerFace * nNeighbors[neighbor][0]; /* Note that we can't be a right boundary */
1255     const PetscInt  start0      = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1256     const PetscInt  start1      = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1257     const PetscInt  start2      = 0;
1258     const PetscInt  startGhost0 = 0;
1259     const PetscInt  startGhost1 = 0;
1260     const PetscInt  startGhost2 = ghostOffsetStart[2];
1261     const PetscInt  endGhost0   = ghostOffsetStart[0];
1262     const PetscInt  endGhost1   = ghostOffsetStart[1];
1263     const PetscInt  endGhost2   = stag->nGhost[2] - ghostOffsetEnd[2];
1264     const PetscBool extra0      = PETSC_FALSE;
1265     const PetscBool extra1      = PETSC_FALSE;
1266     const PetscBool extra2      = dummyEnd[2];
1267     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1268   }
1269 
1270   /* DOWN */
1271   if (!dummyStart[1]) {
1272     const PetscInt  neighbor    = 10;
1273     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1274     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1];
1275     const PetscInt  epFaceRow   = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
1276     const PetscInt  start0      = 0;
1277     const PetscInt  start1      = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1278     const PetscInt  start2      = 0;
1279     const PetscInt  startGhost0 = ghostOffsetStart[0];
1280     const PetscInt  startGhost1 = 0;
1281     const PetscInt  startGhost2 = ghostOffsetStart[2];
1282     const PetscInt  endGhost0   = stag->nGhost[0] - ghostOffsetEnd[0];
1283     const PetscInt  endGhost1   = ghostOffsetStart[1];
1284     const PetscInt  endGhost2   = stag->nGhost[2] - ghostOffsetEnd[2];
1285     const PetscBool extra0      = dummyEnd[0];
1286     const PetscBool extra1      = PETSC_FALSE;
1287     const PetscBool extra2      = dummyEnd[2];
1288     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1289   }
1290 
1291   /* RIGHT DOWN */
1292   if (!star && !dummyEnd[0] && !dummyStart[1]) {
1293     const PetscInt  neighbor    = 11;
1294     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1295     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1];
1296     const PetscInt  epFaceRow   = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
1297     const PetscInt  start0      = 0;
1298     const PetscInt  start1      = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1299     const PetscInt  start2      = 0;
1300     const PetscInt  startGhost0 = stag->nGhost[0] - ghostOffsetEnd[0];
1301     const PetscInt  startGhost1 = 0;
1302     const PetscInt  startGhost2 = ghostOffsetStart[2];
1303     const PetscInt  endGhost0   = stag->nGhost[0];
1304     const PetscInt  endGhost1   = ghostOffsetStart[1];
1305     const PetscInt  endGhost2   = stag->nGhost[2] - ghostOffsetEnd[2];
1306     const PetscBool extra0      = PETSC_FALSE;
1307     const PetscBool extra1      = PETSC_FALSE;
1308     const PetscBool extra2      = dummyEnd[2];
1309     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1310   }
1311 
1312   /* LEFT */
1313   if (!dummyStart[0]) {
1314     const PetscInt  neighbor    = 12;
1315     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0];
1316     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0); /* We+neighbor may be a top boundary */
1317     const PetscInt  epFaceRow   = entriesPerFace * nNeighbors[neighbor][0];
1318     const PetscInt  start0      = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1319     const PetscInt  start1      = 0;
1320     const PetscInt  start2      = 0;
1321     const PetscInt  startGhost0 = 0;
1322     const PetscInt  startGhost1 = ghostOffsetStart[1];
1323     const PetscInt  startGhost2 = ghostOffsetStart[2];
1324     const PetscInt  endGhost0   = ghostOffsetStart[0];
1325     const PetscInt  endGhost1   = stag->nGhost[1] - ghostOffsetEnd[1];
1326     const PetscInt  endGhost2   = stag->nGhost[2] - ghostOffsetEnd[2];
1327     const PetscBool extra0      = PETSC_FALSE;
1328     const PetscBool extra1      = dummyEnd[1];
1329     const PetscBool extra2      = dummyEnd[2];
1330     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1331   }
1332 
1333   /* (HERE) */
1334   {
1335     const PetscInt  neighbor    = 13;
1336     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                    /* We may be a right boundary */
1337     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* We may be a top boundary */
1338     const PetscInt  epFaceRow   = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0);                                                             /* We may be a right boundary */
1339     const PetscInt  start0      = 0;
1340     const PetscInt  start1      = 0;
1341     const PetscInt  start2      = 0;
1342     const PetscInt  startGhost0 = ghostOffsetStart[0];
1343     const PetscInt  startGhost1 = ghostOffsetStart[1];
1344     const PetscInt  startGhost2 = ghostOffsetStart[2];
1345     const PetscInt  endGhost0   = stag->nGhost[0] - ghostOffsetEnd[0];
1346     const PetscInt  endGhost1   = stag->nGhost[1] - ghostOffsetEnd[1];
1347     const PetscInt  endGhost2   = stag->nGhost[2] - ghostOffsetEnd[2];
1348     const PetscBool extra0      = dummyEnd[0];
1349     const PetscBool extra1      = dummyEnd[1];
1350     const PetscBool extra2      = dummyEnd[2];
1351     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1352   }
1353 
1354   /* RIGHT */
1355   if (!dummyEnd[0]) {
1356     const PetscInt  neighbor    = 14;
1357     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                    /* Neighbor may be a right boundary */
1358     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1359     const PetscInt  epFaceRow   = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0);                                                             /* Neighbor may be a right boundary */
1360     const PetscInt  start0      = 0;
1361     const PetscInt  start1      = 0;
1362     const PetscInt  start2      = 0;
1363     const PetscInt  startGhost0 = stag->nGhost[0] - ghostOffsetEnd[0];
1364     const PetscInt  startGhost1 = ghostOffsetStart[1];
1365     const PetscInt  startGhost2 = ghostOffsetStart[2];
1366     const PetscInt  endGhost0   = stag->nGhost[0];
1367     const PetscInt  endGhost1   = stag->nGhost[1] - ghostOffsetEnd[1];
1368     const PetscInt  endGhost2   = stag->nGhost[2] - ghostOffsetEnd[2];
1369     const PetscBool extra0      = PETSC_FALSE;
1370     const PetscBool extra1      = dummyEnd[1];
1371     const PetscBool extra2      = dummyEnd[2];
1372     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1373   }
1374 
1375   /* LEFT UP */
1376   if (!star && !dummyStart[0] && !dummyEnd[1]) {
1377     const PetscInt  neighbor    = 15;
1378     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0];
1379     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1380     const PetscInt  epFaceRow   = entriesPerFace * nNeighbors[neighbor][0];
1381     const PetscInt  start0      = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1382     const PetscInt  start1      = 0;
1383     const PetscInt  start2      = 0;
1384     const PetscInt  startGhost0 = 0;
1385     const PetscInt  startGhost1 = stag->nGhost[1] - ghostOffsetEnd[1];
1386     const PetscInt  startGhost2 = ghostOffsetStart[2];
1387     const PetscInt  endGhost0   = ghostOffsetStart[0];
1388     const PetscInt  endGhost1   = stag->nGhost[1];
1389     const PetscInt  endGhost2   = stag->nGhost[2] - ghostOffsetEnd[2];
1390     const PetscBool extra0      = PETSC_FALSE;
1391     const PetscBool extra1      = PETSC_FALSE;
1392     const PetscBool extra2      = dummyEnd[2];
1393     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1394   }
1395 
1396   /* UP */
1397   if (!dummyEnd[1]) {
1398     const PetscInt  neighbor    = 16;
1399     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                          /* We+neighbor may be a right boundary */
1400     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1401     const PetscInt  epFaceRow   = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0);                                                                   /* We+neighbor may be a right boundary */
1402     const PetscInt  start0      = 0;
1403     const PetscInt  start1      = 0;
1404     const PetscInt  start2      = 0;
1405     const PetscInt  startGhost0 = ghostOffsetStart[0];
1406     const PetscInt  startGhost1 = stag->nGhost[1] - ghostOffsetEnd[1];
1407     const PetscInt  startGhost2 = ghostOffsetStart[2];
1408     const PetscInt  endGhost0   = stag->nGhost[0] - ghostOffsetEnd[0];
1409     const PetscInt  endGhost1   = stag->nGhost[1];
1410     const PetscInt  endGhost2   = stag->nGhost[2] - ghostOffsetEnd[2];
1411     const PetscBool extra0      = dummyEnd[0];
1412     const PetscBool extra1      = PETSC_FALSE;
1413     const PetscBool extra2      = dummyEnd[2];
1414     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1415   }
1416 
1417   /* RIGHT UP */
1418   if (!star && !dummyEnd[0] && !dummyEnd[1]) {
1419     const PetscInt  neighbor    = 17;
1420     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                          /* Neighbor may be a right boundary */
1421     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1422     const PetscInt  epFaceRow   = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0);                                                                   /* Neighbor may be a right boundary */
1423     const PetscInt  start0      = 0;
1424     const PetscInt  start1      = 0;
1425     const PetscInt  start2      = 0;
1426     const PetscInt  startGhost0 = stag->nGhost[0] - ghostOffsetEnd[0];
1427     const PetscInt  startGhost1 = stag->nGhost[1] - ghostOffsetEnd[1];
1428     const PetscInt  startGhost2 = ghostOffsetStart[2];
1429     const PetscInt  endGhost0   = stag->nGhost[0];
1430     const PetscInt  endGhost1   = stag->nGhost[1];
1431     const PetscInt  endGhost2   = stag->nGhost[2] - ghostOffsetEnd[2];
1432     const PetscBool extra0      = PETSC_FALSE;
1433     const PetscBool extra1      = PETSC_FALSE;
1434     const PetscBool extra2      = dummyEnd[2];
1435     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1436   }
1437 
1438   /* LEFT DOWN FRONT */
1439   if (!star && !dummyStart[0] && !dummyStart[1] && !dummyEnd[2]) {
1440     const PetscInt  neighbor    = 18;
1441     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0];
1442     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1];
1443     const PetscInt  epFaceRow   = -1;
1444     const PetscInt  start0      = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1445     const PetscInt  start1      = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1446     const PetscInt  start2      = 0;
1447     const PetscInt  startGhost0 = 0;
1448     const PetscInt  startGhost1 = 0;
1449     const PetscInt  startGhost2 = stag->nGhost[2] - ghostOffsetEnd[2];
1450     const PetscInt  endGhost0   = ghostOffsetStart[0];
1451     const PetscInt  endGhost1   = ghostOffsetStart[1];
1452     const PetscInt  endGhost2   = stag->nGhost[2];
1453     const PetscBool extra0      = PETSC_FALSE;
1454     const PetscBool extra1      = PETSC_FALSE;
1455     const PetscBool extra2      = PETSC_FALSE;
1456     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1457   }
1458 
1459   /* DOWN FRONT */
1460   if (!star && !dummyStart[1] && !dummyEnd[2]) {
1461     const PetscInt  neighbor    = 19;
1462     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1463     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1];
1464     const PetscInt  epFaceRow   = -1;
1465     const PetscInt  start0      = 0;
1466     const PetscInt  start1      = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1467     const PetscInt  start2      = 0;
1468     const PetscInt  startGhost0 = ghostOffsetStart[0];
1469     const PetscInt  startGhost1 = 0;
1470     const PetscInt  startGhost2 = stag->nGhost[2] - ghostOffsetEnd[2];
1471     const PetscInt  endGhost0   = stag->nGhost[0] - ghostOffsetEnd[0];
1472     const PetscInt  endGhost1   = ghostOffsetStart[1];
1473     const PetscInt  endGhost2   = stag->nGhost[2];
1474     const PetscBool extra0      = dummyEnd[0];
1475     const PetscBool extra1      = PETSC_FALSE;
1476     const PetscBool extra2      = PETSC_FALSE;
1477     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1478   }
1479 
1480   /* RIGHT DOWN FRONT */
1481   if (!star && !dummyEnd[0] && !dummyStart[1] && !dummyEnd[2]) {
1482     const PetscInt  neighbor    = 20;
1483     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1484     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1];
1485     const PetscInt  epFaceRow   = -1;
1486     const PetscInt  start0      = 0;
1487     const PetscInt  start1      = nNeighbors[neighbor][1] - ghostOffsetStart[1];
1488     const PetscInt  start2      = 0;
1489     const PetscInt  startGhost0 = stag->nGhost[0] - ghostOffsetEnd[0];
1490     const PetscInt  startGhost1 = 0;
1491     const PetscInt  startGhost2 = stag->nGhost[2] - ghostOffsetEnd[2];
1492     const PetscInt  endGhost0   = stag->nGhost[0];
1493     const PetscInt  endGhost1   = ghostOffsetStart[1];
1494     const PetscInt  endGhost2   = stag->nGhost[2];
1495     const PetscBool extra0      = PETSC_FALSE;
1496     const PetscBool extra1      = PETSC_FALSE;
1497     const PetscBool extra2      = PETSC_FALSE;
1498     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1499   }
1500 
1501   /* LEFT FRONT */
1502   if (!star && !dummyStart[0] && !dummyEnd[2]) {
1503     const PetscInt  neighbor    = 21;
1504     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0];
1505     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0); /* We+neighbor may be a top boundary */
1506     const PetscInt  epFaceRow   = -1;
1507     const PetscInt  start0      = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1508     const PetscInt  start1      = 0;
1509     const PetscInt  start2      = 0;
1510     const PetscInt  startGhost0 = 0;
1511     const PetscInt  startGhost1 = ghostOffsetStart[1];
1512     const PetscInt  startGhost2 = stag->nGhost[2] - ghostOffsetEnd[2];
1513     const PetscInt  endGhost0   = ghostOffsetStart[0];
1514     const PetscInt  endGhost1   = stag->nGhost[1] - ghostOffsetEnd[1];
1515     const PetscInt  endGhost2   = stag->nGhost[2];
1516     const PetscBool extra0      = PETSC_FALSE;
1517     const PetscBool extra1      = dummyEnd[1];
1518     const PetscBool extra2      = PETSC_FALSE;
1519     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1520   }
1521 
1522   /* FRONT */
1523   if (!dummyEnd[2]) {
1524     const PetscInt  neighbor    = 22;
1525     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                    /* neighbor is a right boundary if we are*/
1526     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* May be a top boundary */
1527     const PetscInt  epFaceRow   = -1;
1528     const PetscInt  start0      = 0;
1529     const PetscInt  start1      = 0;
1530     const PetscInt  start2      = 0;
1531     const PetscInt  startGhost0 = ghostOffsetStart[0];
1532     const PetscInt  startGhost1 = ghostOffsetStart[1];
1533     const PetscInt  startGhost2 = stag->nGhost[2] - ghostOffsetEnd[2];
1534     const PetscInt  endGhost0   = stag->nGhost[0] - ghostOffsetEnd[0];
1535     const PetscInt  endGhost1   = stag->nGhost[1] - ghostOffsetEnd[1];
1536     const PetscInt  endGhost2   = stag->nGhost[2];
1537     const PetscBool extra0      = dummyEnd[0];
1538     const PetscBool extra1      = dummyEnd[1];
1539     const PetscBool extra2      = PETSC_FALSE;
1540     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1541   }
1542 
1543   /* RIGHT FRONT */
1544   if (!star && !dummyEnd[0] && !dummyEnd[2]) {
1545     const PetscInt  neighbor    = 23;
1546     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                    /* Neighbor may be a right boundary */
1547     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (dummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1548     const PetscInt  epFaceRow   = -1;
1549     const PetscInt  start0      = 0;
1550     const PetscInt  start1      = 0;
1551     const PetscInt  start2      = 0;
1552     const PetscInt  startGhost0 = stag->nGhost[0] - ghostOffsetEnd[0];
1553     const PetscInt  startGhost1 = ghostOffsetStart[1];
1554     const PetscInt  startGhost2 = stag->nGhost[2] - ghostOffsetEnd[2];
1555     const PetscInt  endGhost0   = stag->nGhost[0];
1556     const PetscInt  endGhost1   = stag->nGhost[1] - ghostOffsetEnd[1];
1557     const PetscInt  endGhost2   = stag->nGhost[2];
1558     const PetscBool extra0      = PETSC_FALSE;
1559     const PetscBool extra1      = dummyEnd[1];
1560     const PetscBool extra2      = PETSC_FALSE;
1561     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1562   }
1563 
1564   /* LEFT UP FRONT */
1565   if (!star && !dummyStart[0] && !dummyEnd[1] && !dummyEnd[2]) {
1566     const PetscInt  neighbor    = 24;
1567     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0];
1568     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1569     const PetscInt  epFaceRow   = -1;
1570     const PetscInt  start0      = nNeighbors[neighbor][0] - ghostOffsetStart[0];
1571     const PetscInt  start1      = 0;
1572     const PetscInt  start2      = 0;
1573     const PetscInt  startGhost0 = 0;
1574     const PetscInt  startGhost1 = stag->nGhost[1] - ghostOffsetEnd[1];
1575     const PetscInt  startGhost2 = stag->nGhost[2] - ghostOffsetEnd[2];
1576     const PetscInt  endGhost0   = ghostOffsetStart[0];
1577     const PetscInt  endGhost1   = stag->nGhost[1];
1578     const PetscInt  endGhost2   = stag->nGhost[2];
1579     const PetscBool extra0      = PETSC_FALSE;
1580     const PetscBool extra1      = PETSC_FALSE;
1581     const PetscBool extra2      = PETSC_FALSE;
1582     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1583   }
1584 
1585   /* UP FRONT */
1586   if (!star && !dummyEnd[1] && !dummyEnd[2]) {
1587     const PetscInt  neighbor    = 25;
1588     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                          /* We+neighbor may be a right boundary */
1589     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1590     const PetscInt  epFaceRow   = -1;
1591     const PetscInt  start0      = 0;
1592     const PetscInt  start1      = 0;
1593     const PetscInt  start2      = 0;
1594     const PetscInt  startGhost0 = ghostOffsetStart[0];
1595     const PetscInt  startGhost1 = stag->nGhost[1] - ghostOffsetEnd[1];
1596     const PetscInt  startGhost2 = stag->nGhost[2] - ghostOffsetEnd[2];
1597     const PetscInt  endGhost0   = stag->nGhost[0] - ghostOffsetEnd[0];
1598     const PetscInt  endGhost1   = stag->nGhost[1];
1599     const PetscInt  endGhost2   = stag->nGhost[2];
1600     const PetscBool extra0      = dummyEnd[0];
1601     const PetscBool extra1      = PETSC_FALSE;
1602     const PetscBool extra2      = PETSC_FALSE;
1603     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1604   }
1605 
1606   /* RIGHT UP FRONT */
1607   if (!star && !dummyEnd[0] && !dummyEnd[1] && !dummyEnd[2]) {
1608     const PetscInt  neighbor    = 26;
1609     const PetscInt  eprNeighbor = stag->entriesPerElement * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                          /* Neighbor may be a right boundary */
1610     const PetscInt  eplNeighbor = eprNeighbor * nNeighbors[neighbor][1] + (nextToDummyEnd[1] ? nNeighbors[neighbor][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1611     const PetscInt  epFaceRow   = -1;
1612     const PetscInt  start0      = 0;
1613     const PetscInt  start1      = 0;
1614     const PetscInt  start2      = 0;
1615     const PetscInt  startGhost0 = stag->nGhost[0] - ghostOffsetEnd[0];
1616     const PetscInt  startGhost1 = stag->nGhost[1] - ghostOffsetEnd[1];
1617     const PetscInt  startGhost2 = stag->nGhost[2] - ghostOffsetEnd[2];
1618     const PetscInt  endGhost0   = stag->nGhost[0];
1619     const PetscInt  endGhost1   = stag->nGhost[1];
1620     const PetscInt  endGhost2   = stag->nGhost[2];
1621     const PetscBool extra0      = PETSC_FALSE;
1622     const PetscBool extra1      = PETSC_FALSE;
1623     const PetscBool extra2      = PETSC_FALSE;
1624     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, eprNeighbor, eplNeighbor, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
1625   }
1626 
1627   PetscCheck(count == entriesToTransferTotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of entries computed in gtol (%" PetscInt_FMT ") is not as expected (%" PetscInt_FMT ")", count, entriesToTransferTotal);
1628 
1629   /* Create stag->gtol. The order is computed as PETSc ordering, and doesn't include dummy entries */
1630   {
1631     Vec local, global;
1632     IS  isLocal, isGlobal;
1633     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), entriesToTransferTotal, idxLocal, PETSC_OWN_POINTER, &isLocal));
1634     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), entriesToTransferTotal, idxGlobal, PETSC_OWN_POINTER, &isGlobal));
1635     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm), 1, stag->entries, PETSC_DECIDE, NULL, &global));
1636     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, PetscMax(stag->entriesPerElement, 1), stag->entriesGhost, NULL, &local));
1637     PetscCall(VecScatterCreate(global, isGlobal, local, isLocal, &stag->gtol));
1638     PetscCall(VecDestroy(&global));
1639     PetscCall(VecDestroy(&local));
1640     PetscCall(ISDestroy(&isLocal));  /* frees idxLocal */
1641     PetscCall(ISDestroy(&isGlobal)); /* free idxGlobal */
1642   }
1643   PetscFunctionReturn(PETSC_SUCCESS);
1644 }
1645 
1646 /* Note: this function assumes that DMBoundary types of none, ghosted, and periodic are the only ones of interest.
1647 Adding support for others should be done very carefully.  */
DMStagSetUpBuildL2G_3d(DM dm,const PetscInt * globalOffsets)1648 static PetscErrorCode DMStagSetUpBuildL2G_3d(DM dm, const PetscInt *globalOffsets)
1649 {
1650   const DM_Stag *const stag = (DM_Stag *)dm->data;
1651   PetscInt            *idxGlobalAll;
1652   PetscInt             d, count, ighost, jghost, kghost, ghostOffsetStart[3], ghostOffsetEnd[3], entriesPerFace, entriesPerEdge;
1653   PetscInt             nNeighbors[27][3], eprNeighbor[27], eplNeighbor[27], globalOffsetNeighbor[27];
1654   PetscBool            nextToDummyEnd[3], dummyStart[3], dummyEnd[3], star;
1655 
1656   PetscFunctionBegin;
1657   /* Check stencil type */
1658   PetscCheck(stag->stencilType == DMSTAG_STENCIL_NONE || stag->stencilType == DMSTAG_STENCIL_BOX || stag->stencilType == DMSTAG_STENCIL_STAR, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported stencil type %s", DMStagStencilTypes[stag->stencilType]);
1659   star = (PetscBool)(stag->stencilType == DMSTAG_STENCIL_STAR || stag->stencilType == DMSTAG_STENCIL_NONE);
1660 
1661   /* Convenience variables */
1662   entriesPerFace = stag->dof[0] + 2 * stag->dof[1] + stag->dof[2];
1663   entriesPerEdge = stag->dof[0] + stag->dof[1];
1664   for (d = 0; d < 3; ++d) nextToDummyEnd[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->rank[d] == stag->nRanks[d] - 2);
1665 
1666   /* Ghost offsets (may not be the ghost width, since we always have at least one ghost element on the right/top/front) */
1667   for (d = 0; d < 3; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
1668   for (d = 0; d < 3; ++d) ghostOffsetEnd[d] = stag->startGhost[d] + stag->nGhost[d] - (stag->start[d] + stag->n[d]);
1669 
1670   /* Whether the ghost region includes dummies. Currently equivalent to being a non-periodic boundary. */
1671   for (d = 0; d < 3; ++d) dummyStart[d] = (PetscBool)(stag->firstRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1672   for (d = 0; d < 3; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1673 
1674   /* Compute numbers of elements on each neighbor  and offset*/
1675   {
1676     PetscInt i;
1677     for (i = 0; i < 27; ++i) {
1678       const PetscInt neighborRank = stag->neighbors[i];
1679       if (neighborRank >= 0) { /* note we copy the values for our own rank (neighbor 13) */
1680         nNeighbors[i][0]        = stag->l[0][neighborRank % stag->nRanks[0]];
1681         nNeighbors[i][1]        = stag->l[1][neighborRank % (stag->nRanks[0] * stag->nRanks[1]) / stag->nRanks[0]];
1682         nNeighbors[i][2]        = stag->l[2][neighborRank / (stag->nRanks[0] * stag->nRanks[1])];
1683         globalOffsetNeighbor[i] = globalOffsets[stag->neighbors[i]];
1684       } /* else leave uninitialized - error if accessed */
1685     }
1686   }
1687 
1688   /* Precompute elements per row and layer on neighbor (zero unused) */
1689   PetscCall(PetscMemzero(eprNeighbor, sizeof(eprNeighbor)));
1690   PetscCall(PetscMemzero(eplNeighbor, sizeof(eplNeighbor)));
1691   if (stag->neighbors[0] >= 0) {
1692     eprNeighbor[0] = stag->entriesPerElement * nNeighbors[0][0];
1693     eplNeighbor[0] = eprNeighbor[0] * nNeighbors[0][1];
1694   }
1695   if (stag->neighbors[1] >= 0) {
1696     eprNeighbor[1] = stag->entriesPerElement * nNeighbors[1][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1697     eplNeighbor[1] = eprNeighbor[1] * nNeighbors[1][1];
1698   }
1699   if (stag->neighbors[2] >= 0) {
1700     eprNeighbor[2] = stag->entriesPerElement * nNeighbors[2][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1701     eplNeighbor[2] = eprNeighbor[2] * nNeighbors[2][1];
1702   }
1703   if (stag->neighbors[3] >= 0) {
1704     eprNeighbor[3] = stag->entriesPerElement * nNeighbors[3][0];
1705     eplNeighbor[3] = eprNeighbor[3] * nNeighbors[3][1] + (dummyEnd[1] ? nNeighbors[3][0] * entriesPerFace : 0); /* May be a top boundary */
1706   }
1707   if (stag->neighbors[4] >= 0) {
1708     eprNeighbor[4] = stag->entriesPerElement * nNeighbors[4][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                /* We+neighbor may  be a right boundary */
1709     eplNeighbor[4] = eprNeighbor[4] * nNeighbors[4][1] + (dummyEnd[1] ? nNeighbors[4][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* We+neighbor may be a top boundary */
1710   }
1711   if (stag->neighbors[5] >= 0) {
1712     eprNeighbor[5] = stag->entriesPerElement * nNeighbors[5][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                /* Neighbor may be a right boundary */
1713     eplNeighbor[5] = eprNeighbor[5] * nNeighbors[5][1] + (dummyEnd[1] ? nNeighbors[5][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1714   }
1715   if (stag->neighbors[6] >= 0) {
1716     eprNeighbor[6] = stag->entriesPerElement * nNeighbors[6][0];
1717     eplNeighbor[6] = eprNeighbor[6] * nNeighbors[6][1] + (nextToDummyEnd[1] ? nNeighbors[6][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1718   }
1719   if (stag->neighbors[7] >= 0) {
1720     eprNeighbor[7] = stag->entriesPerElement * nNeighbors[7][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                      /* We+neighbor may be a right boundary */
1721     eplNeighbor[7] = eprNeighbor[7] * nNeighbors[7][1] + (nextToDummyEnd[1] ? nNeighbors[7][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1722   }
1723   if (stag->neighbors[8] >= 0) {
1724     eprNeighbor[8] = stag->entriesPerElement * nNeighbors[8][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                      /* Neighbor may be a right boundary */
1725     eplNeighbor[8] = eprNeighbor[8] * nNeighbors[8][1] + (nextToDummyEnd[1] ? nNeighbors[8][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1726   }
1727   if (stag->neighbors[9] >= 0) {
1728     eprNeighbor[9] = stag->entriesPerElement * nNeighbors[9][0];
1729     eplNeighbor[9] = eprNeighbor[9] * nNeighbors[9][1];
1730   }
1731   if (stag->neighbors[10] >= 0) {
1732     eprNeighbor[10] = stag->entriesPerElement * nNeighbors[10][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1733     eplNeighbor[10] = eprNeighbor[10] * nNeighbors[10][1];
1734   }
1735   if (stag->neighbors[11] >= 0) {
1736     eprNeighbor[11] = stag->entriesPerElement * nNeighbors[11][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1737     eplNeighbor[11] = eprNeighbor[11] * nNeighbors[11][1];
1738   }
1739   if (stag->neighbors[12] >= 0) {
1740     eprNeighbor[12] = stag->entriesPerElement * nNeighbors[12][0];
1741     eplNeighbor[12] = eprNeighbor[12] * nNeighbors[12][1] + (dummyEnd[1] ? nNeighbors[12][0] * entriesPerFace : 0); /* We+neighbor may be a top boundary */
1742   }
1743   if (stag->neighbors[13] >= 0) {
1744     eprNeighbor[13] = stag->entriesPerElement * nNeighbors[13][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                  /* We may be a right boundary */
1745     eplNeighbor[13] = eprNeighbor[13] * nNeighbors[13][1] + (dummyEnd[1] ? nNeighbors[13][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* We may be a top boundary */
1746   }
1747   if (stag->neighbors[14] >= 0) {
1748     eprNeighbor[14] = stag->entriesPerElement * nNeighbors[14][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                  /* Neighbor may be a right boundary */
1749     eplNeighbor[14] = eprNeighbor[14] * nNeighbors[14][1] + (dummyEnd[1] ? nNeighbors[14][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1750   }
1751   if (stag->neighbors[15] >= 0) {
1752     eprNeighbor[15] = stag->entriesPerElement * nNeighbors[15][0];
1753     eplNeighbor[15] = eprNeighbor[15] * nNeighbors[15][1] + (nextToDummyEnd[1] ? nNeighbors[15][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1754   }
1755   if (stag->neighbors[16] >= 0) {
1756     eprNeighbor[16] = stag->entriesPerElement * nNeighbors[16][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                        /* We+neighbor may be a right boundary */
1757     eplNeighbor[16] = eprNeighbor[16] * nNeighbors[16][1] + (nextToDummyEnd[1] ? nNeighbors[16][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1758   }
1759   if (stag->neighbors[17] >= 0) {
1760     eprNeighbor[17] = stag->entriesPerElement * nNeighbors[17][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                        /* Neighbor may be a right boundary */
1761     eplNeighbor[17] = eprNeighbor[17] * nNeighbors[17][1] + (nextToDummyEnd[1] ? nNeighbors[17][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1762   }
1763   if (stag->neighbors[18] >= 0) {
1764     eprNeighbor[18] = stag->entriesPerElement * nNeighbors[18][0];
1765     eplNeighbor[18] = eprNeighbor[18] * nNeighbors[18][1];
1766   }
1767   if (stag->neighbors[19] >= 0) {
1768     eprNeighbor[19] = stag->entriesPerElement * nNeighbors[19][0] + (dummyEnd[0] ? entriesPerFace : 0); /* We+neighbor may be a right boundary */
1769     eplNeighbor[19] = eprNeighbor[19] * nNeighbors[19][1];
1770   }
1771   if (stag->neighbors[20] >= 0) {
1772     eprNeighbor[20] = stag->entriesPerElement * nNeighbors[20][0] + (nextToDummyEnd[0] ? entriesPerFace : 0); /* Neighbor may be a right boundary */
1773     eplNeighbor[20] = eprNeighbor[20] * nNeighbors[20][1];
1774   }
1775   if (stag->neighbors[21] >= 0) {
1776     eprNeighbor[21] = stag->entriesPerElement * nNeighbors[21][0];
1777     eplNeighbor[21] = eprNeighbor[21] * nNeighbors[21][1] + (dummyEnd[1] ? nNeighbors[21][0] * entriesPerFace : 0); /* We+neighbor may be a top boundary */
1778   }
1779   if (stag->neighbors[22] >= 0) {
1780     eprNeighbor[22] = stag->entriesPerElement * nNeighbors[22][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                  /* neighbor is a right boundary if we are*/
1781     eplNeighbor[22] = eprNeighbor[22] * nNeighbors[22][1] + (dummyEnd[1] ? nNeighbors[22][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* May be a top boundary */
1782   }
1783   if (stag->neighbors[23] >= 0) {
1784     eprNeighbor[23] = stag->entriesPerElement * nNeighbors[23][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                  /* Neighbor may be a right boundary */
1785     eplNeighbor[23] = eprNeighbor[23] * nNeighbors[23][1] + (dummyEnd[1] ? nNeighbors[23][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* We and neighbor may be a top boundary */
1786   }
1787   if (stag->neighbors[24] >= 0) {
1788     eprNeighbor[24] = stag->entriesPerElement * nNeighbors[24][0];
1789     eplNeighbor[24] = eprNeighbor[24] * nNeighbors[24][1] + (nextToDummyEnd[1] ? nNeighbors[24][0] * entriesPerFace : 0); /* Neighbor may be a top boundary */
1790   }
1791   if (stag->neighbors[25] >= 0) {
1792     eprNeighbor[25] = stag->entriesPerElement * nNeighbors[25][0] + (dummyEnd[0] ? entriesPerFace : 0);                                                        /* We+neighbor may be a right boundary */
1793     eplNeighbor[25] = eprNeighbor[25] * nNeighbors[25][1] + (nextToDummyEnd[1] ? nNeighbors[25][0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1794   }
1795   if (stag->neighbors[26] >= 0) {
1796     eprNeighbor[26] = stag->entriesPerElement * nNeighbors[26][0] + (nextToDummyEnd[0] ? entriesPerFace : 0);                                                        /* Neighbor may be a right boundary */
1797     eplNeighbor[26] = eprNeighbor[26] * nNeighbors[26][1] + (nextToDummyEnd[1] ? nNeighbors[26][0] * entriesPerFace + (nextToDummyEnd[0] ? entriesPerEdge : 0) : 0); /* Neighbor may be a top boundary */
1798   }
1799 
1800   /* Populate idxGlobalAll */
1801   PetscCall(PetscMalloc1(stag->entriesGhost, &idxGlobalAll));
1802   count = 0;
1803 
1804   /* Loop over layers 1/3 : Back */
1805   if (!dummyStart[2]) {
1806     for (kghost = 0; kghost < ghostOffsetStart[2]; ++kghost) {
1807       const PetscInt k = nNeighbors[4][2] - ghostOffsetStart[2] + kghost; /* Note: this is the same value for all neighbors in this layer (use neighbor 4 which will always exist if we're lookng at this layer) */
1808 
1809       /* Loop over rows 1/3: Down Back*/
1810       if (!star && !dummyStart[1]) {
1811         for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
1812           const PetscInt j = nNeighbors[1][1] - ghostOffsetStart[1] + jghost; /* Note: this is the same value for all neighbors in this row (use neighbor 1, down back)*/
1813 
1814           /* Loop over columns 1/3: Left Back Down*/
1815           if (!dummyStart[0]) {
1816             const PetscInt neighbor = 0;
1817             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
1818               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
1819               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1820             }
1821           } else {
1822             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
1823               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
1824             }
1825           }
1826 
1827           /* Loop over columns 2/3: (Middle) Down Back */
1828           {
1829             const PetscInt neighbor = 1;
1830             for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
1831               const PetscInt i = ighost - ghostOffsetStart[0];
1832               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1833             }
1834           }
1835 
1836           /* Loop over columns 3/3: Right Down Back */
1837           if (!dummyEnd[0]) {
1838             const PetscInt neighbor = 2;
1839             PetscInt       i;
1840             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
1841               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1842             }
1843           } else {
1844             /* Partial dummy entries on (Middle) Down Back neighbor */
1845             const PetscInt neighbor = 1;
1846             PetscInt       i, dLocal;
1847             i = stag->n[0];
1848             for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
1849               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1850             }
1851             for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
1852               idxGlobalAll[count] = -1;
1853             }
1854             for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
1855               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1856             }
1857             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face */
1858               idxGlobalAll[count] = -1;
1859             }
1860             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
1861               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1862             }
1863             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
1864               idxGlobalAll[count] = -1;
1865             }
1866             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
1867               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1868             }
1869             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
1870               idxGlobalAll[count] = -1;
1871             }
1872             ++i;
1873             for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
1874               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
1875             }
1876           }
1877         }
1878       } else {
1879         for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
1880           for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
1881             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
1882           }
1883         }
1884       }
1885 
1886       /* Loop over rows 2/3: (Middle) Back */
1887       {
1888         for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
1889           const PetscInt j = jghost - ghostOffsetStart[1];
1890 
1891           /* Loop over columns 1/3: Left (Middle) Back */
1892           if (!star && !dummyStart[0]) {
1893             const PetscInt neighbor = 3;
1894             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
1895               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
1896               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1897             }
1898           } else {
1899             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
1900               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
1901             }
1902           }
1903 
1904           /* Loop over columns 2/3: (Middle) (Middle) Back */
1905           {
1906             const PetscInt neighbor = 4;
1907             for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
1908               const PetscInt i = ighost - ghostOffsetStart[0];
1909               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1910             }
1911           }
1912 
1913           /* Loop over columns 3/3: Right (Middle) Back */
1914           if (!star && !dummyEnd[0]) {
1915             const PetscInt neighbor = 5;
1916             PetscInt       i;
1917             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
1918               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1919             }
1920           } else if (dummyEnd[0]) {
1921             /* Partial dummy entries on (Middle) (Middle) Back rank */
1922             const PetscInt neighbor = 4;
1923             PetscInt       i, dLocal;
1924             i = stag->n[0];
1925             for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
1926               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1927             }
1928             for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
1929               idxGlobalAll[count] = -1;
1930             }
1931             for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
1932               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1933             }
1934             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face */
1935               idxGlobalAll[count] = -1;
1936             }
1937             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
1938               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1939             }
1940             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
1941               idxGlobalAll[count] = -1;
1942             }
1943             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
1944               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1945             }
1946             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
1947               idxGlobalAll[count] = -1;
1948             }
1949             ++i;
1950             for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
1951               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
1952             }
1953           } else {
1954             /* Right (Middle) Back dummies */
1955             PetscInt i;
1956             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
1957               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
1958             }
1959           }
1960         }
1961       }
1962 
1963       /* Loop over rows 3/3: Up Back */
1964       if (!star && !dummyEnd[1]) {
1965         PetscInt j;
1966         for (j = 0; j < ghostOffsetEnd[1]; ++j) {
1967           /* Loop over columns 1/3: Left Up Back*/
1968           if (!dummyStart[0]) {
1969             const PetscInt neighbor = 6;
1970             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
1971               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
1972               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1973             }
1974           } else {
1975             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
1976               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
1977             }
1978           }
1979 
1980           /* Loop over columns 2/3: (Middle) Up Back*/
1981           {
1982             const PetscInt neighbor = 7;
1983             for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
1984               const PetscInt i = ighost - ghostOffsetStart[0];
1985               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1986             }
1987           }
1988 
1989           /* Loop over columns 3/3: Right Up Back*/
1990           if (!dummyEnd[0]) {
1991             const PetscInt neighbor = 8;
1992             PetscInt       i;
1993             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
1994               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
1995             }
1996           } else {
1997             /* Partial dummies on (Middle) Up Back */
1998             const PetscInt neighbor = 7;
1999             PetscInt       i, dLocal;
2000             i = stag->n[0];
2001             for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2002               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2003             }
2004             for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
2005               idxGlobalAll[count] = -1;
2006             }
2007             for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2008               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2009             }
2010             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face */
2011               idxGlobalAll[count] = -1;
2012             }
2013             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2014               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2015             }
2016             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2017               idxGlobalAll[count] = -1;
2018             }
2019             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2020               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2021             }
2022             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2023               idxGlobalAll[count] = -1;
2024             }
2025             ++i;
2026             for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2027               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2028             }
2029           }
2030         }
2031       } else if (dummyEnd[1]) {
2032         /* Up Back partial dummy row */
2033         PetscInt j = stag->n[1];
2034 
2035         if (!star && !dummyStart[0]) {
2036           /* Up Back partial dummy row: Loop over columns 1/3: Left Up Back, on Left (Middle) Back rank */
2037           const PetscInt neighbor = 3;
2038           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2039             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2040             PetscInt       dLocal;
2041             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                                  /* Vertex and back down edge */
2042               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2043             }
2044 
2045             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face and back left edge */
2046               idxGlobalAll[count] = -1;
2047             }
2048             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++d, ++dLocal, ++count) {                                            /* Down left edge and down face */
2049               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2050             }
2051             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2052               idxGlobalAll[count] = -1;
2053             }
2054           }
2055         } else {
2056           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2057             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2058           }
2059         }
2060 
2061         /* Up Back partial dummy row: Loop over columns 2/3: (Middle) Up Back, on (Middle) (Middle) Back rank */
2062         {
2063           const PetscInt neighbor = 4;
2064           for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2065             const PetscInt i = ighost - ghostOffsetStart[0];
2066             PetscInt       dLocal;
2067             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                                  /* Vertex and back down edge */
2068               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2069             }
2070             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face and back left edge */
2071               idxGlobalAll[count] = -1;
2072             }
2073             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++d, ++dLocal, ++count) {                                            /* Down left edge and down face */
2074               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2075             }
2076             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2077               idxGlobalAll[count] = -1;
2078             }
2079           }
2080         }
2081 
2082         /* Up Back partial dummy row: Loop over columns 3/3: Right Up Back, on Right (Middle) Back rank */
2083         if (!star && !dummyEnd[0]) {
2084           const PetscInt neighbor = 5;
2085           PetscInt       i;
2086           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2087             PetscInt dLocal;
2088             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                                  /* Vertex and back down edge */
2089               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2090             }
2091             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face and back left edge */
2092               idxGlobalAll[count] = -1;
2093             }
2094             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++d, ++dLocal, ++count) {                                            /* Down left edge and down face */
2095               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2096             }
2097             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2098               idxGlobalAll[count] = -1;
2099             }
2100           }
2101         } else if (dummyEnd[0]) {
2102           /* Up Right Back partial dummy element, on (Middle) (Middle) Back rank */
2103           const PetscInt neighbor = 4;
2104           PetscInt       i, dLocal;
2105           i = stag->n[0];
2106           for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) {                                                                 /* Vertex */
2107             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2108           }
2109           for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back down edge, back face and back left edge */
2110             idxGlobalAll[count] = -1;
2111           }
2112           for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                                                /* Down left edge */
2113             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2114           }
2115           for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies down face, left face and element */
2116             idxGlobalAll[count] = -1;
2117           }
2118           ++i;
2119           for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2120             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2121           }
2122         } else {
2123           /* Up Right Back dummies */
2124           PetscInt i;
2125           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2126             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2127           }
2128         }
2129         ++j;
2130         /* Up Back additional dummy rows */
2131         for (; j < stag->n[1] + ghostOffsetEnd[1]; ++j) {
2132           for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
2133             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2134           }
2135         }
2136       } else {
2137         /* Up Back dummy rows */
2138         PetscInt j;
2139         for (j = 0; j < ghostOffsetEnd[1]; ++j) {
2140           for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
2141             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2142           }
2143         }
2144       }
2145     }
2146   } else {
2147     for (kghost = 0; kghost < ghostOffsetStart[2]; ++kghost) {
2148       for (jghost = 0; jghost < stag->nGhost[1]; ++jghost) {
2149         for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
2150           for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2151         }
2152       }
2153     }
2154   }
2155 
2156   /* Loop over layers 2/3 : (Middle)  */
2157   {
2158     for (kghost = ghostOffsetStart[2]; kghost < stag->nGhost[2] - ghostOffsetEnd[2]; ++kghost) {
2159       const PetscInt k = kghost - ghostOffsetStart[2];
2160 
2161       /* Loop over rows 1/3: Down (Middle) */
2162       if (!dummyStart[1]) {
2163         for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
2164           const PetscInt j = nNeighbors[10][1] - ghostOffsetStart[1] + jghost; /* down neighbor (10) always exists */
2165 
2166           /* Loop over columns 1/3: Left Down (Middle) */
2167           if (!star && !dummyStart[0]) {
2168             const PetscInt neighbor = 9;
2169             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2170               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2171               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2172             }
2173           } else {
2174             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2175               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2176             }
2177           }
2178 
2179           /* Loop over columns 2/3: (Middle) Down (Middle) */
2180           {
2181             const PetscInt neighbor = 10;
2182             for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2183               const PetscInt i = ighost - ghostOffsetStart[0];
2184               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2185             }
2186           }
2187 
2188           /* Loop over columns 3/3: Right Down (Middle) */
2189           if (!star && !dummyEnd[0]) {
2190             const PetscInt neighbor = 11;
2191             PetscInt       i;
2192             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2193               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2194             }
2195           } else if (dummyEnd[0]) {
2196             /* Partial dummies on (Middle) Down (Middle) neighbor */
2197             const PetscInt neighbor = 10;
2198             PetscInt       i, dLocal;
2199             i = stag->n[0];
2200             for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2201               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2202             }
2203             for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
2204               idxGlobalAll[count] = -1;
2205             }
2206             for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2207               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2208             }
2209             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face */
2210               idxGlobalAll[count] = -1;
2211             }
2212             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2213               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2214             }
2215             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2216               idxGlobalAll[count] = -1;
2217             }
2218             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2219               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2220             }
2221             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2222               idxGlobalAll[count] = -1;
2223             }
2224             ++i;
2225             for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2226               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2227             }
2228           } else {
2229             /* Right Down (Middle) dummies */
2230             PetscInt i;
2231             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2232               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2233             }
2234           }
2235         }
2236       } else {
2237         for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
2238           for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
2239             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2240           }
2241         }
2242       }
2243 
2244       /* Loop over rows 2/3: (Middle) (Middle) */
2245       {
2246         for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
2247           const PetscInt j = jghost - ghostOffsetStart[1];
2248 
2249           /* Loop over columns 1/3: Left (Middle) (Middle) */
2250           if (!dummyStart[0]) {
2251             const PetscInt neighbor = 12;
2252             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2253               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2254               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2255             }
2256           } else {
2257             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2258               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2259             }
2260           }
2261 
2262           /* Loop over columns 2/3: (Middle) (Middle) (Middle) aka (Here) */
2263           {
2264             const PetscInt neighbor = 13;
2265             for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2266               const PetscInt i = ighost - ghostOffsetStart[0];
2267               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2268             }
2269           }
2270 
2271           /* Loop over columns 3/3: Right (Middle) (Middle) */
2272           if (!dummyEnd[0]) {
2273             const PetscInt neighbor = 14;
2274             PetscInt       i;
2275             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2276               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2277             }
2278           } else {
2279             /* Partial dummies on this rank */
2280             const PetscInt neighbor = 13;
2281             PetscInt       i, dLocal;
2282             i = stag->n[0];
2283             for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2284               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2285             }
2286             for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
2287               idxGlobalAll[count] = -1;
2288             }
2289             for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2290               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2291             }
2292             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face */
2293               idxGlobalAll[count] = -1;
2294             }
2295             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2296               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2297             }
2298             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2299               idxGlobalAll[count] = -1;
2300             }
2301             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2302               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2303             }
2304             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2305               idxGlobalAll[count] = -1;
2306             }
2307             ++i;
2308             for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2309               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2310             }
2311           }
2312         }
2313       }
2314 
2315       /* Loop over rows 3/3: Up (Middle) */
2316       if (!dummyEnd[1]) {
2317         PetscInt j;
2318         for (j = 0; j < ghostOffsetEnd[1]; ++j) {
2319           /* Loop over columns 1/3: Left Up (Middle) */
2320           if (!star && !dummyStart[0]) {
2321             const PetscInt neighbor = 15;
2322             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2323               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2324               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2325             }
2326           } else {
2327             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2328               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2329             }
2330           }
2331 
2332           /* Loop over columns 2/3: (Middle) Up (Middle) */
2333           {
2334             const PetscInt neighbor = 16;
2335             for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2336               const PetscInt i = ighost - ghostOffsetStart[0];
2337               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2338             }
2339           }
2340 
2341           /* Loop over columns 3/3: Right Up (Middle) */
2342           if (!star && !dummyEnd[0]) {
2343             const PetscInt neighbor = 17;
2344             PetscInt       i;
2345             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2346               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2347             }
2348           } else if (dummyEnd[0]) {
2349             /* Partial dummies on (Middle) Up (Middle) rank */
2350             const PetscInt neighbor = 16;
2351             PetscInt       i, dLocal;
2352             i = stag->n[0];
2353             for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2354               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2355             }
2356             for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
2357               idxGlobalAll[count] = -1;
2358             }
2359             for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2360               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2361             }
2362             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face */
2363               idxGlobalAll[count] = -1;
2364             }
2365             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2366               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2367             }
2368             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2369               idxGlobalAll[count] = -1;
2370             }
2371             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2372               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2373             }
2374             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2375               idxGlobalAll[count] = -1;
2376             }
2377             ++i;
2378             for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2379               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2380             }
2381           } else {
2382             /* Right Up (Middle) dumies */
2383             PetscInt i;
2384             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2385               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2386             }
2387           }
2388         }
2389       } else {
2390         /* Up (Middle) partial dummy row */
2391         PetscInt j = stag->n[1];
2392 
2393         /* Up (Middle) partial dummy row: columns 1/3: Left Up (Middle), on Left (Middle) (Middle) rank */
2394         if (!dummyStart[0]) {
2395           const PetscInt neighbor = 12;
2396           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2397             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2398             PetscInt       dLocal;
2399             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                                  /* Vertex and back down edge */
2400               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2401             }
2402             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face and back left edge */
2403               idxGlobalAll[count] = -1;
2404             }
2405             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++d, ++dLocal, ++count) {                                            /* Down left edge and down face */
2406               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2407             }
2408             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2409               idxGlobalAll[count] = -1;
2410             }
2411           }
2412         } else {
2413           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2414             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2415           }
2416         }
2417 
2418         /* Up (Middle) partial dummy row: columns 2/3: (Middle) Up (Middle), on (Middle) (Middle) (Middle) = here rank */
2419         {
2420           const PetscInt neighbor = 13;
2421           for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2422             const PetscInt i = ighost - ghostOffsetStart[0];
2423             PetscInt       dLocal;
2424             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                                  /* Vertex and back down edge */
2425               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2426             }
2427             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face and back left edge */
2428               idxGlobalAll[count] = -1;
2429             }
2430             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++d, ++dLocal, ++count) {                                            /* Down left edge and down face */
2431               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2432             }
2433             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2434               idxGlobalAll[count] = -1;
2435             }
2436           }
2437         }
2438 
2439         if (!dummyEnd[0]) {
2440           /* Up (Middle) partial dummy row: columns 3/3: Right Up (Middle), on Right (Middle) (Middle) rank */
2441           const PetscInt neighbor = 14;
2442           PetscInt       i;
2443           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2444             PetscInt dLocal;
2445             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                                  /* Vertex and back down edge */
2446               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2447             }
2448             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face and back left edge */
2449               idxGlobalAll[count] = -1;
2450             }
2451             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++d, ++dLocal, ++count) {                                            /* Down left edge and down face */
2452               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2453             }
2454             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2455               idxGlobalAll[count] = -1;
2456             }
2457           }
2458         } else {
2459           /* Up (Middle) Right partial dummy element, on (Middle) (Middle) (Middle) = here rank */
2460           const PetscInt neighbor = 13;
2461           PetscInt       i, dLocal;
2462           i = stag->n[0];
2463           for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) {                                                                 /* Vertex */
2464             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2465           }
2466           for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back down edge, back face and back left edge */
2467             idxGlobalAll[count] = -1;
2468           }
2469           for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                                                /* Down left edge */
2470             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2471           }
2472           for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies down face, left face and element */
2473             idxGlobalAll[count] = -1;
2474           }
2475           ++i;
2476           for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2477             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2478           }
2479         }
2480         ++j;
2481         /* Up (Middle) additional dummy rows */
2482         for (; j < stag->n[1] + ghostOffsetEnd[1]; ++j) {
2483           for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
2484             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2485           }
2486         }
2487       }
2488     }
2489   }
2490 
2491   /* Loop over layers 3/3 : Front */
2492   if (!dummyEnd[2]) {
2493     PetscInt k;
2494     for (k = 0; k < ghostOffsetEnd[2]; ++k) {
2495       /* Loop over rows 1/3: Down Front */
2496       if (!star && !dummyStart[1]) {
2497         for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
2498           const PetscInt j = nNeighbors[19][1] - ghostOffsetStart[1] + jghost; /* Constant for whole row (use down front neighbor) */
2499 
2500           /* Loop over columns 1/3: Left Down Front */
2501           if (!dummyStart[0]) {
2502             const PetscInt neighbor = 18;
2503             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2504               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2505               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2506             }
2507           } else {
2508             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2509               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2510             }
2511           }
2512 
2513           /* Loop over columns 2/3: (Middle) Down Front */
2514           {
2515             const PetscInt neighbor = 19;
2516             for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2517               const PetscInt i = ighost - ghostOffsetStart[0];
2518               for (d = 0; d < stag->entriesPerElement; ++d, ++count) { /* vertex, 2 edges, and face associated with back face */
2519                 idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2520               }
2521             }
2522           }
2523 
2524           /* Loop over columns 3/3: Right Down Front */
2525           if (!dummyEnd[0]) {
2526             const PetscInt neighbor = 20;
2527             PetscInt       i;
2528             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2529               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2530             }
2531           } else {
2532             /* Partial dummy element on (Middle) Down Front rank */
2533             const PetscInt neighbor = 19;
2534             PetscInt       i, dLocal;
2535             i = stag->n[0];
2536             for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2537               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2538             }
2539             for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
2540               idxGlobalAll[count] = -1;
2541             }
2542             for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2543               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2544             }
2545             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face */
2546               idxGlobalAll[count] = -1;
2547             }
2548             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2549               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2550             }
2551             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2552               idxGlobalAll[count] = -1;
2553             }
2554             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2555               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2556             }
2557             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2558               idxGlobalAll[count] = -1;
2559             }
2560             ++i;
2561             for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2562               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2563             }
2564           }
2565         }
2566       } else {
2567         for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
2568           for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
2569             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2570           }
2571         }
2572       }
2573 
2574       /* Loop over rows 2/3: (Middle) Front */
2575       {
2576         for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
2577           const PetscInt j = jghost - ghostOffsetStart[1];
2578 
2579           /* Loop over columns 1/3: Left (Middle) Front*/
2580           if (!star && !dummyStart[0]) {
2581             const PetscInt neighbor = 21;
2582             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2583               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2584               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2585             }
2586           } else {
2587             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2588               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2589             }
2590           }
2591 
2592           /* Loop over columns 2/3: (Middle) (Middle) Front*/
2593           {
2594             const PetscInt neighbor = 22;
2595             for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2596               const PetscInt i = ighost - ghostOffsetStart[0];
2597               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2598             }
2599           }
2600 
2601           /* Loop over columns 3/3: Right (Middle) Front*/
2602           if (!star && !dummyEnd[0]) {
2603             const PetscInt neighbor = 23;
2604             PetscInt       i;
2605             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2606               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2607             }
2608           } else if (dummyEnd[0]) {
2609             /* Partial dummy element on (Middle) (Middle) Front element */
2610             const PetscInt neighbor = 22;
2611             PetscInt       i, dLocal;
2612             i = stag->n[0];
2613             for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2614               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2615             }
2616             for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
2617               idxGlobalAll[count] = -1;
2618             }
2619             for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2620               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2621             }
2622             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face */
2623               idxGlobalAll[count] = -1;
2624             }
2625             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2626               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2627             }
2628             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2629               idxGlobalAll[count] = -1;
2630             }
2631             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2632               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2633             }
2634             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2635               idxGlobalAll[count] = -1;
2636             }
2637             ++i;
2638             for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2639               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2640             }
2641           } else {
2642             /* Right (Middle) Front dummies */
2643             PetscInt i;
2644             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2645               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2646             }
2647           }
2648         }
2649       }
2650 
2651       /* Loop over rows 3/3: Up Front */
2652       if (!star && !dummyEnd[1]) {
2653         PetscInt j;
2654         for (j = 0; j < ghostOffsetEnd[1]; ++j) {
2655           /* Loop over columns 1/3: Left Up Front */
2656           if (!dummyStart[0]) {
2657             const PetscInt neighbor = 24;
2658             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2659               const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2660               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2661             }
2662           } else {
2663             for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2664               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2665             }
2666           }
2667 
2668           /* Loop over columns 2/3: (Middle) Up Front */
2669           {
2670             const PetscInt neighbor = 25;
2671             for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2672               const PetscInt i = ighost - ghostOffsetStart[0];
2673               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2674             }
2675           }
2676 
2677           /* Loop over columns 3/3: Right Up Front */
2678           if (!dummyEnd[0]) {
2679             const PetscInt neighbor = 26;
2680             PetscInt       i;
2681             for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2682               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2683             }
2684           } else {
2685             /* Partial dummy element on (Middle) Up Front rank */
2686             const PetscInt neighbor = 25;
2687             PetscInt       i, dLocal;
2688             i = stag->n[0];
2689             for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) { /* Vertex */
2690               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2691             }
2692             for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
2693               idxGlobalAll[count] = -1;
2694             }
2695             for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) { /* Back left edge */
2696               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2697             }
2698             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face */
2699               idxGlobalAll[count] = -1;
2700             }
2701             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) { /* Down left edge */
2702               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2703             }
2704             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++dLocal, ++count) { /* Dummies on down face */
2705               idxGlobalAll[count] = -1;
2706             }
2707             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 3 * stag->dof[2]; ++d, ++dLocal, ++count) { /* Left face */
2708               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * stag->entriesPerElement + d;
2709             }
2710             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on element */
2711               idxGlobalAll[count] = -1;
2712             }
2713             ++i;
2714             for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2715               for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2716             }
2717           }
2718         }
2719       } else if (dummyEnd[1]) {
2720         /* Up Front partial dummy row */
2721         PetscInt j = stag->n[1];
2722 
2723         /* Up Front partial dummy row: columns 1/3: Left Up Front, on Left (Middle) Front rank */
2724         if (!star && !dummyStart[0]) {
2725           const PetscInt neighbor = 21;
2726           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2727             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2728             PetscInt       dLocal;
2729             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                                  /* Vertex and back down edge */
2730               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2731             }
2732             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face and back left edge */
2733               idxGlobalAll[count] = -1;
2734             }
2735             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++d, ++dLocal, ++count) {                                            /* Down left edge and down face */
2736               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2737             }
2738             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2739               idxGlobalAll[count] = -1;
2740             }
2741           }
2742         } else {
2743           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2744             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2745           }
2746         }
2747 
2748         /* Up Front partial dummy row: columns 2/3: (Middle) Up Front, on (Middle) (Middle) Front rank */
2749         {
2750           const PetscInt neighbor = 22;
2751           for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2752             const PetscInt i = ighost - ghostOffsetStart[0];
2753             PetscInt       dLocal;
2754             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                                  /* Vertex and back down edge */
2755               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2756             }
2757             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face and back left edge */
2758               idxGlobalAll[count] = -1;
2759             }
2760             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++d, ++dLocal, ++count) {                                            /* Down left edge and down face */
2761               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2762             }
2763             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2764               idxGlobalAll[count] = -1;
2765             }
2766           }
2767         }
2768 
2769         if (!star && !dummyEnd[0]) {
2770           /* Up Front partial dummy row: columns 3/3: Right Up Front, on Right (Middle) Front rank */
2771           const PetscInt neighbor = 23;
2772           PetscInt       i;
2773           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2774             PetscInt dLocal;
2775             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                                  /* Vertex and back down edge */
2776               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2777             }
2778             for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back face and back left edge */
2779               idxGlobalAll[count] = -1;
2780             }
2781             for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + 2 * stag->dof[2]; ++d, ++dLocal, ++count) {                                            /* Down left edge and down face */
2782               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2783             }
2784             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies left face and element */
2785               idxGlobalAll[count] = -1;
2786             }
2787           }
2788 
2789         } else if (dummyEnd[0]) {
2790           /* Partial Right Up Front dummy element, on (Middle) (Middle) Front rank */
2791           const PetscInt neighbor = 22;
2792           PetscInt       i, dLocal;
2793           i = stag->n[0];
2794           for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) {                                                                 /* Vertex */
2795             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2796           }
2797           for (; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++dLocal, ++count) { /* Dummies on back down edge, back face and back left edge */
2798             idxGlobalAll[count] = -1;
2799           }
2800           for (; dLocal < stag->dof[0] + 3 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                                                /* Down left edge */
2801             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * eprNeighbor[neighbor] + i * entriesPerFace + d; /* Note i increment by face */
2802           }
2803           for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies down face, left face and element */
2804             idxGlobalAll[count] = -1;
2805           }
2806           ++i;
2807           for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2808             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2809           }
2810         } else {
2811           /* Right Up Front dummies */
2812           PetscInt i;
2813           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2814             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2815           }
2816         }
2817         ++j;
2818         /* Up Front additional dummy rows */
2819         for (; j < stag->n[1] + ghostOffsetEnd[1]; ++j) {
2820           for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
2821             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2822           }
2823         }
2824       } else {
2825         /* Up Front dummies */
2826         PetscInt j;
2827         for (j = 0; j < ghostOffsetEnd[1]; ++j) {
2828           for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
2829             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2830           }
2831         }
2832       }
2833     }
2834   } else {
2835     PetscInt k = stag->n[2];
2836 
2837     /* Front partial ghost layer: rows 1/3: Down Front, on Down (Middle) */
2838     if (!dummyStart[1]) {
2839       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
2840         const PetscInt j = nNeighbors[10][1] - ghostOffsetStart[1] + jghost; /* wrt down neighbor (10) */
2841 
2842         /* Down Front partial ghost row: columns 1/3: Left Down Front, on  Left Down (Middle) */
2843         if (!star && !dummyStart[0]) {
2844           const PetscInt neighbor  = 9;
2845           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0]; /* Note that we can't be a right boundary */
2846           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2847             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2848             PetscInt       dLocal;
2849             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                   /* Vertex, back down edge, back face, back left edge */
2850               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note j increment by face */
2851             }
2852             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
2853               idxGlobalAll[count] = -1;
2854             }
2855           }
2856         } else {
2857           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2858             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2859           }
2860         }
2861 
2862         /* Down Front partial ghost row: columns 2/3: (Middle) Down Front, on (Middle) Down (Middle) */
2863         {
2864           const PetscInt neighbor  = 10;
2865           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
2866           for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2867             const PetscInt i = ighost - ghostOffsetStart[0];
2868             PetscInt       dLocal;
2869             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                   /* Vertex, back down edge, back face, back left edge */
2870               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note j increment by face */
2871             }
2872             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
2873               idxGlobalAll[count] = -1;
2874             }
2875           }
2876         }
2877 
2878         if (!star && !dummyEnd[0]) {
2879           /* Down Front partial dummy row: columns 3/3: Right Down Front, on Right Down (Middle) */
2880           const PetscInt neighbor  = 11;
2881           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
2882           PetscInt       i;
2883           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2884             PetscInt dLocal;
2885             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                   /* Vertex, back down edge, back face, back left edge */
2886               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note j increment by face */
2887             }
2888             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
2889               idxGlobalAll[count] = -1;
2890             }
2891           }
2892         } else if (dummyEnd[0]) {
2893           /* Right Down Front partial dummy element, living on (Middle) Down (Middle) rank */
2894           const PetscInt neighbor  = 10;
2895           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
2896           PetscInt       i, dLocal;
2897           i = stag->n[0];
2898           for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) {                                                     /* Vertex */
2899             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note i increment by face */
2900           }
2901           for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
2902             idxGlobalAll[count] = -1;
2903           }
2904           for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) {                                                   /* left back edge */
2905             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note i increment by face */
2906           }
2907           for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
2908             idxGlobalAll[count] = -1;
2909           }
2910           ++i;
2911           for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
2912             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2913           }
2914         } else {
2915           PetscInt i;
2916           /* Right Down Front dummies */
2917           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2918             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2919           }
2920         }
2921       }
2922     } else {
2923       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
2924         for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
2925           for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2926         }
2927       }
2928     }
2929 
2930     /* Front partial ghost layer: rows 2/3: (Middle) Front, on (Middle) (Middle) */
2931     {
2932       for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
2933         const PetscInt j = jghost - ghostOffsetStart[1];
2934 
2935         /* (Middle) Front partial dummy row: columns 1/3: Left (Middle) Front, on Left (Middle) (Middle) */
2936         if (!dummyStart[0]) {
2937           const PetscInt neighbor  = 12;
2938           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0];
2939           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2940             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
2941             PetscInt       dLocal;
2942             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                   /* Vertex, back down edge, back face, back left edge */
2943               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note j increment by face */
2944             }
2945             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
2946               idxGlobalAll[count] = -1;
2947             }
2948           }
2949         } else {
2950           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
2951             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
2952           }
2953         }
2954 
2955         /* (Middle) Front partial dummy row: columns 2/3: (Middle) (Middle) Front, on (Middle) (Middle) (Middle) */
2956         {
2957           const PetscInt neighbor  = 13;
2958           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We may be a right boundary */
2959           for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
2960             const PetscInt i = ighost - ghostOffsetStart[0];
2961             PetscInt       dLocal;
2962             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                   /* Vertex, back down edge, back face, back left edge */
2963               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note j increment by face */
2964             }
2965             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
2966               idxGlobalAll[count] = -1;
2967             }
2968           }
2969         }
2970 
2971         if (!dummyEnd[0]) {
2972           /* (Middle) Front partial dummy row: columns 3/3: Right (Middle) Front, on Right (Middle) (Middle) */
2973           const PetscInt neighbor  = 14;
2974           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
2975           PetscInt       i;
2976           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
2977             PetscInt dLocal;
2978             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                   /* Vertex, back down edge, back face, back left edge */
2979               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note j increment by face */
2980             }
2981             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
2982               idxGlobalAll[count] = -1;
2983             }
2984           }
2985         } else {
2986           /* Right (Middle) Front partial dummy element, on (Middle) (Middle) (Middle) rank */
2987           const PetscInt neighbor  = 13;
2988           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We may be a right boundary */
2989           PetscInt       i, dLocal;
2990           i = stag->n[0];
2991           for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) {                                                     /* Vertex */
2992             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note i increment by face */
2993           }
2994           for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
2995             idxGlobalAll[count] = -1;
2996           }
2997           for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) {                                                   /* left back edge */
2998             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note i increment by face */
2999           }
3000           for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3001             idxGlobalAll[count] = -1;
3002           }
3003           ++i;
3004           for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
3005             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
3006           }
3007         }
3008       }
3009     }
3010 
3011     /* Front partial ghost layer: rows 3/3: Up Front, on Up (Middle) */
3012     if (!dummyEnd[1]) {
3013       PetscInt j;
3014       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
3015         /* Up Front partial dummy row: columns 1/3: Left Up Front, on Left Up (Middle) */
3016         if (!star && !dummyStart[0]) {
3017           const PetscInt neighbor  = 15;
3018           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0];
3019           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
3020             const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
3021             PetscInt       dLocal;
3022             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                   /* Vertex, back down edge, back face, back left edge */
3023               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note j increment by face */
3024             }
3025             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
3026               idxGlobalAll[count] = -1;
3027             }
3028           }
3029         } else {
3030           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
3031             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
3032           }
3033         }
3034 
3035         /* Up Front partial dummy row: columns 2/3: (Middle) Up Front, on (Middle) Up (Middle) */
3036         {
3037           const PetscInt neighbor  = 16;
3038           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
3039           for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
3040             const PetscInt i = ighost - ghostOffsetStart[0];
3041             PetscInt       dLocal;
3042             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                   /* Vertex, back down edge, back face, back left edge */
3043               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note j increment by face */
3044             }
3045             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
3046               idxGlobalAll[count] = -1;
3047             }
3048           }
3049         }
3050 
3051         if (!star && !dummyEnd[0]) {
3052           /* Up Front partial dummy row: columns 3/3: Right Up Front, on Right Up (Middle) */
3053           const PetscInt neighbor  = 17;
3054           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
3055           PetscInt       i;
3056           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
3057             PetscInt dLocal;
3058             for (d = 0, dLocal = 0; dLocal < stag->dof[0] + 2 * stag->dof[1] + stag->dof[2]; ++d, ++dLocal, ++count) {                   /* Vertex, back down edge, back face, back left edge */
3059               idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note j increment by face */
3060             }
3061             for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining points */
3062               idxGlobalAll[count] = -1;
3063             }
3064           }
3065         } else if (dummyEnd[0]) {
3066           /* Right Up Front partial dummy element, on (Middle) Up (Middle) */
3067           const PetscInt neighbor  = 16;
3068           const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We+neighbor may be a right boundary */
3069           PetscInt       i, dLocal;
3070           i = stag->n[0];
3071           for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) {                                                     /* Vertex */
3072             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note i increment by face */
3073           }
3074           for (; dLocal < stag->dof[0] + stag->dof[1]; ++dLocal, ++count) { /* Dummies on back down edge */
3075             idxGlobalAll[count] = -1;
3076           }
3077           for (; dLocal < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dLocal, ++count) {                                                   /* left back edge */
3078             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerFace + d; /* Note i increment by face */
3079           }
3080           for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3081             idxGlobalAll[count] = -1;
3082           }
3083           ++i;
3084           for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
3085             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
3086           }
3087         } else {
3088           /* Right Up Front dummies */
3089           PetscInt i;
3090           /* Right Down Front dummies */
3091           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
3092             for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
3093           }
3094         }
3095       }
3096     } else {
3097       /* Up Front partial dummy row */
3098       PetscInt j = stag->n[1];
3099 
3100       /* Up Front partial dummy row: columns 1/3: Left Up Front, on Left (Middle) (Middle) */
3101       if (!dummyStart[0]) {
3102         const PetscInt neighbor  = 12;
3103         const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0];
3104         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
3105           const PetscInt i = nNeighbors[neighbor][0] - ghostOffsetStart[0] + ighost;
3106           PetscInt       dLocal;
3107           for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                      /* Vertex  and down back edge */
3108             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerEdge + d; /* Note increments */
3109           }
3110           for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3111             idxGlobalAll[count] = -1;
3112           }
3113         }
3114       } else {
3115         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
3116           for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
3117         }
3118       }
3119 
3120       /* Up Front partial dummy row: columns 2/3: (Middle) Up Front, on (Middle) (Middle) (Middle) */
3121       {
3122         const PetscInt neighbor  = 13;
3123         const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We may be a right boundary */
3124         for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
3125           const PetscInt i = ighost - ghostOffsetStart[0];
3126           PetscInt       dLocal;
3127           for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                      /* Vertex  and down back edge */
3128             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerEdge + d; /* Note increments */
3129           }
3130           for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3131             idxGlobalAll[count] = -1;
3132           }
3133         }
3134       }
3135 
3136       if (!dummyEnd[0]) {
3137         /* Up Front partial dummy row: columns 3/3: Right Up Front, on Right (Middle) (Middle) */
3138         const PetscInt neighbor  = 14;
3139         const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (nextToDummyEnd[0] ? entriesPerEdge : 0); /* Neighbor may be a right boundary */
3140         PetscInt       i;
3141         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
3142           PetscInt dLocal;
3143           for (d = 0, dLocal = 0; dLocal < stag->dof[0] + stag->dof[1]; ++d, ++dLocal, ++count) {                                      /* Vertex  and down back edge */
3144             idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerEdge + d; /* Note increments */
3145           }
3146           for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3147             idxGlobalAll[count] = -1;
3148           }
3149         }
3150       } else {
3151         /* Right Up Front partial dummy element, on this rank */
3152         const PetscInt neighbor  = 13;
3153         const PetscInt epFaceRow = entriesPerFace * nNeighbors[neighbor][0] + (dummyEnd[0] ? entriesPerEdge : 0); /* We may be a right boundary */
3154         PetscInt       i, dLocal;
3155         i = stag->n[0];
3156         for (d = 0, dLocal = 0; dLocal < stag->dof[0]; ++d, ++dLocal, ++count) {                                                     /* Vertex */
3157           idxGlobalAll[count] = globalOffsetNeighbor[neighbor] + k * eplNeighbor[neighbor] + j * epFaceRow + i * entriesPerEdge + d; /* Note increments */
3158         }
3159         for (; dLocal < stag->entriesPerElement; ++dLocal, ++count) { /* Dummies on remaining */
3160           idxGlobalAll[count] = -1;
3161         }
3162         ++i;
3163         for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
3164           for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
3165         }
3166       }
3167       ++j;
3168       /* Up Front additional dummy rows */
3169       for (; j < stag->n[1] + ghostOffsetEnd[1]; ++j) {
3170         for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
3171           for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
3172         }
3173       }
3174     }
3175     /* Front additional dummy layers */
3176     ++k;
3177     for (; k < stag->n[2] + ghostOffsetEnd[2]; ++k) {
3178       for (jghost = 0; jghost < stag->nGhost[1]; ++jghost) {
3179         for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
3180           for (d = 0; d < stag->entriesPerElement; ++d, ++count) idxGlobalAll[count] = -1;
3181         }
3182       }
3183     }
3184   }
3185 
3186   /* Create local-to-global map (in local ordering, includes maps to -1 for dummy points) */
3187   PetscCall(ISLocalToGlobalMappingCreate(PetscObjectComm((PetscObject)dm), 1, stag->entriesGhost, idxGlobalAll, PETSC_OWN_POINTER, &dm->ltogmap));
3188   PetscFunctionReturn(PETSC_SUCCESS);
3189 }
3190 
DMStagComputeLocationOffsets_3d(DM dm)3191 static PetscErrorCode DMStagComputeLocationOffsets_3d(DM dm)
3192 {
3193   DM_Stag *const stag = (DM_Stag *)dm->data;
3194   const PetscInt epe  = stag->entriesPerElement;
3195   const PetscInt epr  = stag->nGhost[0] * epe;
3196   const PetscInt epl  = stag->nGhost[1] * epr;
3197 
3198   PetscFunctionBegin;
3199   PetscCall(PetscMalloc1(DMSTAG_NUMBER_LOCATIONS, &stag->locationOffsets));
3200   stag->locationOffsets[DMSTAG_BACK_DOWN_LEFT]   = 0;
3201   stag->locationOffsets[DMSTAG_BACK_DOWN]        = stag->locationOffsets[DMSTAG_BACK_DOWN_LEFT] + stag->dof[0];
3202   stag->locationOffsets[DMSTAG_BACK_DOWN_RIGHT]  = stag->locationOffsets[DMSTAG_BACK_DOWN_LEFT] + epe;
3203   stag->locationOffsets[DMSTAG_BACK_LEFT]        = stag->locationOffsets[DMSTAG_BACK_DOWN] + stag->dof[1];
3204   stag->locationOffsets[DMSTAG_BACK]             = stag->locationOffsets[DMSTAG_BACK_LEFT] + stag->dof[1];
3205   stag->locationOffsets[DMSTAG_BACK_RIGHT]       = stag->locationOffsets[DMSTAG_BACK_LEFT] + epe;
3206   stag->locationOffsets[DMSTAG_BACK_UP_LEFT]     = stag->locationOffsets[DMSTAG_BACK_DOWN_LEFT] + epr;
3207   stag->locationOffsets[DMSTAG_BACK_UP]          = stag->locationOffsets[DMSTAG_BACK_DOWN] + epr;
3208   stag->locationOffsets[DMSTAG_BACK_UP_RIGHT]    = stag->locationOffsets[DMSTAG_BACK_UP_LEFT] + epe;
3209   stag->locationOffsets[DMSTAG_DOWN_LEFT]        = stag->locationOffsets[DMSTAG_BACK] + stag->dof[2];
3210   stag->locationOffsets[DMSTAG_DOWN]             = stag->locationOffsets[DMSTAG_DOWN_LEFT] + stag->dof[1];
3211   stag->locationOffsets[DMSTAG_DOWN_RIGHT]       = stag->locationOffsets[DMSTAG_DOWN_LEFT] + epe;
3212   stag->locationOffsets[DMSTAG_LEFT]             = stag->locationOffsets[DMSTAG_DOWN] + stag->dof[2];
3213   stag->locationOffsets[DMSTAG_ELEMENT]          = stag->locationOffsets[DMSTAG_LEFT] + stag->dof[2];
3214   stag->locationOffsets[DMSTAG_RIGHT]            = stag->locationOffsets[DMSTAG_LEFT] + epe;
3215   stag->locationOffsets[DMSTAG_UP_LEFT]          = stag->locationOffsets[DMSTAG_DOWN_LEFT] + epr;
3216   stag->locationOffsets[DMSTAG_UP]               = stag->locationOffsets[DMSTAG_DOWN] + epr;
3217   stag->locationOffsets[DMSTAG_UP_RIGHT]         = stag->locationOffsets[DMSTAG_UP_LEFT] + epe;
3218   stag->locationOffsets[DMSTAG_FRONT_DOWN_LEFT]  = stag->locationOffsets[DMSTAG_BACK_DOWN_LEFT] + epl;
3219   stag->locationOffsets[DMSTAG_FRONT_DOWN]       = stag->locationOffsets[DMSTAG_BACK_DOWN] + epl;
3220   stag->locationOffsets[DMSTAG_FRONT_DOWN_RIGHT] = stag->locationOffsets[DMSTAG_FRONT_DOWN_LEFT] + epe;
3221   stag->locationOffsets[DMSTAG_FRONT_LEFT]       = stag->locationOffsets[DMSTAG_BACK_LEFT] + epl;
3222   stag->locationOffsets[DMSTAG_FRONT]            = stag->locationOffsets[DMSTAG_BACK] + epl;
3223   stag->locationOffsets[DMSTAG_FRONT_RIGHT]      = stag->locationOffsets[DMSTAG_FRONT_LEFT] + epe;
3224   stag->locationOffsets[DMSTAG_FRONT_UP_LEFT]    = stag->locationOffsets[DMSTAG_FRONT_DOWN_LEFT] + epr;
3225   stag->locationOffsets[DMSTAG_FRONT_UP]         = stag->locationOffsets[DMSTAG_FRONT_DOWN] + epr;
3226   stag->locationOffsets[DMSTAG_FRONT_UP_RIGHT]   = stag->locationOffsets[DMSTAG_FRONT_UP_LEFT] + epe;
3227   PetscFunctionReturn(PETSC_SUCCESS);
3228 }
3229 
DMStagPopulateLocalToGlobalInjective_3d(DM dm)3230 PETSC_INTERN PetscErrorCode DMStagPopulateLocalToGlobalInjective_3d(DM dm)
3231 {
3232   DM_Stag *const  stag = (DM_Stag *)dm->data;
3233   PetscInt       *idxLocal, *idxGlobal, *globalOffsetsRecomputed;
3234   const PetscInt *globalOffsets;
3235   PetscInt        count, d, entriesPerEdge, entriesPerFace, eprGhost, eplGhost, ghostOffsetStart[3], ghostOffsetEnd[3];
3236   IS              isLocal, isGlobal;
3237   PetscBool       dummyEnd[3];
3238 
3239   PetscFunctionBegin;
3240   PetscCall(DMStagSetUpBuildGlobalOffsets_3d(dm, &globalOffsetsRecomputed)); /* note that we don't actually use all of these. An available optimization is to pass them, when available */
3241   globalOffsets = globalOffsetsRecomputed;
3242   PetscCall(PetscMalloc1(stag->entries, &idxLocal));
3243   PetscCall(PetscMalloc1(stag->entries, &idxGlobal));
3244   for (d = 0; d < 3; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
3245   entriesPerFace = stag->dof[0] + 2 * stag->dof[1] + stag->dof[2];
3246   entriesPerEdge = stag->dof[0] + stag->dof[1];
3247   eprGhost       = stag->nGhost[0] * stag->entriesPerElement; /* epr = entries per (element) row   */
3248   eplGhost       = stag->nGhost[1] * eprGhost;                /* epl = entries per (element) layer */
3249   count          = 0;
3250   for (d = 0; d < 3; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
3251   for (d = 0; d < 3; ++d) ghostOffsetEnd[d] = stag->startGhost[d] + stag->nGhost[d] - (stag->start[d] + stag->n[d]);
3252   {
3253     const PetscInt  neighbor    = 13;
3254     const PetscInt  epr         = stag->entriesPerElement * stag->n[0] + (dummyEnd[0] ? entriesPerFace : 0);                               /* We may be a right boundary */
3255     const PetscInt  epl         = epr * stag->n[1] + (dummyEnd[1] ? stag->n[0] * entriesPerFace + (dummyEnd[0] ? entriesPerEdge : 0) : 0); /* We may be a top boundary */
3256     const PetscInt  epFaceRow   = entriesPerFace * stag->n[0] + (dummyEnd[0] ? entriesPerEdge : 0);                                        /* We may be a right boundary */
3257     const PetscInt  start0      = 0;
3258     const PetscInt  start1      = 0;
3259     const PetscInt  start2      = 0;
3260     const PetscInt  startGhost0 = ghostOffsetStart[0];
3261     const PetscInt  startGhost1 = ghostOffsetStart[1];
3262     const PetscInt  startGhost2 = ghostOffsetStart[2];
3263     const PetscInt  endGhost0   = stag->nGhost[0] - ghostOffsetEnd[0];
3264     const PetscInt  endGhost1   = stag->nGhost[1] - ghostOffsetEnd[1];
3265     const PetscInt  endGhost2   = stag->nGhost[2] - ghostOffsetEnd[2];
3266     const PetscBool extra0      = dummyEnd[0];
3267     const PetscBool extra1      = dummyEnd[1];
3268     const PetscBool extra2      = dummyEnd[2];
3269     PetscCall(DMStagSetUpBuildScatterPopulateIdx_3d(stag, &count, idxLocal, idxGlobal, entriesPerEdge, entriesPerFace, epr, epl, eprGhost, eplGhost, epFaceRow, globalOffsets[stag->neighbors[neighbor]], start0, start1, start2, startGhost0, startGhost1, startGhost2, endGhost0, endGhost1, endGhost2, extra0, extra1, extra2));
3270   }
3271   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), stag->entries, idxLocal, PETSC_OWN_POINTER, &isLocal));
3272   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), stag->entries, idxGlobal, PETSC_OWN_POINTER, &isGlobal));
3273   {
3274     Vec local, global;
3275     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm), 1, stag->entries, PETSC_DECIDE, NULL, &global));
3276     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, PetscMax(stag->entriesPerElement, 1), stag->entriesGhost, NULL, &local));
3277     PetscCall(VecScatterCreate(local, isLocal, global, isGlobal, &stag->ltog_injective));
3278     PetscCall(VecDestroy(&global));
3279     PetscCall(VecDestroy(&local));
3280   }
3281   PetscCall(ISDestroy(&isLocal));
3282   PetscCall(ISDestroy(&isGlobal));
3283   if (globalOffsetsRecomputed) PetscCall(PetscFree(globalOffsetsRecomputed));
3284   PetscFunctionReturn(PETSC_SUCCESS);
3285 }
3286 
DMStagPopulateLocalToLocal3d_Internal(DM dm)3287 PETSC_INTERN PetscErrorCode DMStagPopulateLocalToLocal3d_Internal(DM dm)
3288 {
3289   DM_Stag *const stag = (DM_Stag *)dm->data;
3290   PetscInt      *idxRemap;
3291   PetscBool      dummyEnd[3];
3292   PetscInt       i, j, k, d, count, leftGhostElements, downGhostElements, backGhostElements, iOffset, jOffset, kOffset;
3293   PetscInt       entriesPerRowGhost, entriesPerRowColGhost;
3294   PetscInt       dOffset[8] = {0};
3295 
3296   PetscFunctionBegin;
3297   PetscCall(VecScatterCopy(stag->gtol, &stag->ltol));
3298   PetscCall(PetscMalloc1(stag->entries, &idxRemap));
3299 
3300   for (d = 0; d < 3; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
3301   leftGhostElements     = stag->start[0] - stag->startGhost[0];
3302   downGhostElements     = stag->start[1] - stag->startGhost[1];
3303   backGhostElements     = stag->start[2] - stag->startGhost[2];
3304   entriesPerRowGhost    = stag->nGhost[0] * stag->entriesPerElement;
3305   entriesPerRowColGhost = stag->nGhost[0] * stag->nGhost[1] * stag->entriesPerElement;
3306   dOffset[1]            = dOffset[0] + stag->dof[0];
3307   dOffset[2]            = dOffset[1] + stag->dof[1];
3308   dOffset[3]            = dOffset[2] + stag->dof[1];
3309   dOffset[4]            = dOffset[3] + stag->dof[2];
3310   dOffset[5]            = dOffset[4] + stag->dof[1];
3311   dOffset[6]            = dOffset[5] + stag->dof[2];
3312   dOffset[7]            = dOffset[6] + stag->dof[2];
3313 
3314   count = 0;
3315   for (k = 0; k < stag->n[2]; ++k) {
3316     kOffset = entriesPerRowColGhost * (backGhostElements + k);
3317     for (j = 0; j < stag->n[1]; ++j) {
3318       jOffset = entriesPerRowGhost * (downGhostElements + j);
3319       for (i = 0; i < stag->n[0]; ++i) {
3320         iOffset = stag->entriesPerElement * (leftGhostElements + i);
3321         // all
3322         for (d = 0; d < stag->entriesPerElement; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + d;
3323       }
3324       if (dummyEnd[0]) {
3325         iOffset = stag->entriesPerElement * (leftGhostElements + stag->n[0]);
3326         // back down left, back left, down left, left
3327         for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[0] + d;
3328         for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[2] + d;
3329         for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[4] + d;
3330         for (d = 0; d < stag->dof[2]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[6] + d;
3331       }
3332     }
3333     if (dummyEnd[1]) {
3334       jOffset = entriesPerRowGhost * (downGhostElements + stag->n[1]);
3335       for (i = 0; i < stag->n[0]; ++i) {
3336         iOffset = stag->entriesPerElement * (leftGhostElements + i);
3337         // back down left, back down, down left, down
3338         for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[0] + d;
3339         for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[1] + d;
3340         for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[4] + d;
3341         for (d = 0; d < stag->dof[2]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[5] + d;
3342       }
3343       if (dummyEnd[0]) {
3344         iOffset = stag->entriesPerElement * (leftGhostElements + stag->n[0]);
3345         // back down left, down left
3346         for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[0] + d;
3347         for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[4] + d;
3348       }
3349     }
3350   }
3351   if (dummyEnd[2]) {
3352     kOffset = entriesPerRowColGhost * (backGhostElements + stag->n[2]);
3353     for (j = 0; j < stag->n[1]; ++j) {
3354       jOffset = entriesPerRowGhost * (downGhostElements + j);
3355       for (i = 0; i < stag->n[0]; ++i) {
3356         iOffset = stag->entriesPerElement * (leftGhostElements + i);
3357         // back down left, back down, back left, back
3358         for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[0] + d;
3359         for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[1] + d;
3360         for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[2] + d;
3361         for (d = 0; d < stag->dof[2]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[3] + d;
3362       }
3363       if (dummyEnd[0]) {
3364         iOffset = stag->entriesPerElement * (leftGhostElements + stag->n[0]);
3365         // back down left, back left
3366         for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[0] + d;
3367         for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[2] + d;
3368       }
3369     }
3370     if (dummyEnd[1]) {
3371       jOffset = entriesPerRowGhost * (downGhostElements + stag->n[1]);
3372       for (i = 0; i < stag->n[0]; ++i) {
3373         iOffset = stag->entriesPerElement * (leftGhostElements + i);
3374         // back down left, back down
3375         for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[0] + d;
3376         for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[1] + d;
3377       }
3378       if (dummyEnd[0]) {
3379         iOffset = stag->entriesPerElement * (leftGhostElements + stag->n[0]);
3380         // back down left
3381         for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = kOffset + jOffset + iOffset + dOffset[0] + d;
3382       }
3383     }
3384   }
3385 
3386   PetscCheck(count == stag->entries, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of entries computed in ltol (%" PetscInt_FMT ") is not as expected (%" PetscInt_FMT ")", count, stag->entries);
3387 
3388   PetscCall(VecScatterRemap(stag->ltol, idxRemap, NULL));
3389   PetscCall(PetscFree(idxRemap));
3390   PetscFunctionReturn(PETSC_SUCCESS);
3391 }
3392 
DMCreateMatrix_Stag_3D_AIJ_Assemble(DM dm,Mat A)3393 PETSC_INTERN PetscErrorCode DMCreateMatrix_Stag_3D_AIJ_Assemble(DM dm, Mat A)
3394 {
3395   PetscInt          dof[DMSTAG_MAX_STRATA], epe, stencil_width, N[3], start[3], n[3], n_extra[3];
3396   DMStagStencilType stencil_type;
3397   DMBoundaryType    boundary_type[3];
3398 
3399   /* This implementation gives a very dense stencil, which is likely unsuitable for
3400      (typical) applications which have fewer couplings */
3401   PetscFunctionBegin;
3402   PetscCall(DMStagGetDOF(dm, &dof[0], &dof[1], &dof[2], &dof[3]));
3403   PetscCall(DMStagGetStencilType(dm, &stencil_type));
3404   PetscCall(DMStagGetStencilWidth(dm, &stencil_width));
3405   PetscCall(DMStagGetEntriesPerElement(dm, &epe));
3406   PetscCall(DMStagGetCorners(dm, &start[0], &start[1], &start[2], &n[0], &n[1], &n[2], &n_extra[0], &n_extra[1], &n_extra[2]));
3407   PetscCall(DMStagGetGlobalSizes(dm, &N[0], &N[1], &N[2]));
3408   PetscCall(DMStagGetBoundaryTypes(dm, &boundary_type[0], &boundary_type[1], &boundary_type[2]));
3409 
3410   if (stencil_type == DMSTAG_STENCIL_NONE) {
3411     /* Couple all DOF at each location to each other */
3412     DMStagStencil *row_vertex, *row_edge_down_left, *row_edge_back_down, *row_edge_back_left, *row_face_down, *row_face_left, *row_face_back, *row_element;
3413 
3414     PetscCall(PetscMalloc1(dof[0], &row_vertex));
3415     for (PetscInt c = 0; c < dof[0]; ++c) {
3416       row_vertex[c].loc = DMSTAG_BACK_DOWN_LEFT;
3417       row_vertex[c].c   = c;
3418     }
3419 
3420     PetscCall(PetscMalloc1(dof[1], &row_edge_down_left));
3421     for (PetscInt c = 0; c < dof[1]; ++c) {
3422       row_edge_down_left[c].loc = DMSTAG_DOWN_LEFT;
3423       row_edge_down_left[c].c   = c;
3424     }
3425 
3426     PetscCall(PetscMalloc1(dof[1], &row_edge_back_left));
3427     for (PetscInt c = 0; c < dof[1]; ++c) {
3428       row_edge_back_left[c].loc = DMSTAG_BACK_LEFT;
3429       row_edge_back_left[c].c   = c;
3430     }
3431 
3432     PetscCall(PetscMalloc1(dof[1], &row_edge_back_down));
3433     for (PetscInt c = 0; c < dof[1]; ++c) {
3434       row_edge_back_down[c].loc = DMSTAG_BACK_DOWN;
3435       row_edge_back_down[c].c   = c;
3436     }
3437 
3438     PetscCall(PetscMalloc1(dof[2], &row_face_left));
3439     for (PetscInt c = 0; c < dof[2]; ++c) {
3440       row_face_left[c].loc = DMSTAG_LEFT;
3441       row_face_left[c].c   = c;
3442     }
3443 
3444     PetscCall(PetscMalloc1(dof[2], &row_face_down));
3445     for (PetscInt c = 0; c < dof[2]; ++c) {
3446       row_face_down[c].loc = DMSTAG_DOWN;
3447       row_face_down[c].c   = c;
3448     }
3449 
3450     PetscCall(PetscMalloc1(dof[2], &row_face_back));
3451     for (PetscInt c = 0; c < dof[2]; ++c) {
3452       row_face_back[c].loc = DMSTAG_BACK;
3453       row_face_back[c].c   = c;
3454     }
3455 
3456     PetscCall(PetscMalloc1(dof[3], &row_element));
3457     for (PetscInt c = 0; c < dof[3]; ++c) {
3458       row_element[c].loc = DMSTAG_ELEMENT;
3459       row_element[c].c   = c;
3460     }
3461 
3462     for (PetscInt ez = start[2]; ez < start[2] + n[2] + n_extra[2]; ++ez) {
3463       for (PetscInt ey = start[1]; ey < start[1] + n[1] + n_extra[1]; ++ey) {
3464         for (PetscInt ex = start[0]; ex < start[0] + n[0] + n_extra[0]; ++ex) {
3465           for (PetscInt c = 0; c < dof[0]; ++c) {
3466             row_vertex[c].i = ex;
3467             row_vertex[c].j = ey;
3468             row_vertex[c].k = ez;
3469           }
3470           PetscCall(DMStagMatSetValuesStencil(dm, A, dof[0], row_vertex, dof[0], row_vertex, NULL, INSERT_VALUES));
3471 
3472           if (ez < N[2]) {
3473             for (PetscInt c = 0; c < dof[1]; ++c) {
3474               row_edge_down_left[c].i = ex;
3475               row_edge_down_left[c].j = ey;
3476               row_edge_down_left[c].k = ez;
3477             }
3478             PetscCall(DMStagMatSetValuesStencil(dm, A, dof[1], row_edge_down_left, dof[1], row_edge_down_left, NULL, INSERT_VALUES));
3479           }
3480 
3481           if (ey < N[1]) {
3482             for (PetscInt c = 0; c < dof[1]; ++c) {
3483               row_edge_back_left[c].i = ex;
3484               row_edge_back_left[c].j = ey;
3485               row_edge_back_left[c].k = ez;
3486             }
3487             PetscCall(DMStagMatSetValuesStencil(dm, A, dof[1], row_edge_back_left, dof[1], row_edge_back_left, NULL, INSERT_VALUES));
3488           }
3489 
3490           if (ey < N[0]) {
3491             for (PetscInt c = 0; c < dof[1]; ++c) {
3492               row_edge_back_down[c].i = ex;
3493               row_edge_back_down[c].j = ey;
3494               row_edge_back_down[c].k = ez;
3495             }
3496             PetscCall(DMStagMatSetValuesStencil(dm, A, dof[1], row_edge_back_down, dof[1], row_edge_back_down, NULL, INSERT_VALUES));
3497           }
3498 
3499           if (ey < N[1] && ez < N[2]) {
3500             for (PetscInt c = 0; c < dof[2]; ++c) {
3501               row_face_left[c].i = ex;
3502               row_face_left[c].j = ey;
3503               row_face_left[c].k = ez;
3504             }
3505             PetscCall(DMStagMatSetValuesStencil(dm, A, dof[2], row_face_left, dof[2], row_face_left, NULL, INSERT_VALUES));
3506           }
3507 
3508           if (ex < N[0] && ez < N[2]) {
3509             for (PetscInt c = 0; c < dof[2]; ++c) {
3510               row_face_down[c].i = ex;
3511               row_face_down[c].j = ey;
3512               row_face_down[c].k = ez;
3513             }
3514             PetscCall(DMStagMatSetValuesStencil(dm, A, dof[2], row_face_down, dof[2], row_face_down, NULL, INSERT_VALUES));
3515           }
3516 
3517           if (ex < N[0] && ey < N[1]) {
3518             for (PetscInt c = 0; c < dof[2]; ++c) {
3519               row_face_back[c].i = ex;
3520               row_face_back[c].j = ey;
3521               row_face_back[c].k = ez;
3522             }
3523             PetscCall(DMStagMatSetValuesStencil(dm, A, dof[2], row_face_back, dof[2], row_face_back, NULL, INSERT_VALUES));
3524           }
3525 
3526           if (ex < N[0] && ey < N[1] && ez < N[2]) {
3527             for (PetscInt c = 0; c < dof[3]; ++c) {
3528               row_element[c].i = ex;
3529               row_element[c].j = ey;
3530               row_element[c].k = ez;
3531             }
3532             PetscCall(DMStagMatSetValuesStencil(dm, A, dof[3], row_element, dof[3], row_element, NULL, INSERT_VALUES));
3533           }
3534         }
3535       }
3536     }
3537     PetscCall(PetscFree(row_vertex));
3538     PetscCall(PetscFree(row_edge_back_left));
3539     PetscCall(PetscFree(row_edge_back_down));
3540     PetscCall(PetscFree(row_edge_down_left));
3541     PetscCall(PetscFree(row_face_left));
3542     PetscCall(PetscFree(row_face_back));
3543     PetscCall(PetscFree(row_face_down));
3544     PetscCall(PetscFree(row_element));
3545   } else if (stencil_type == DMSTAG_STENCIL_STAR || stencil_type == DMSTAG_STENCIL_BOX) {
3546     DMStagStencil *col, *row;
3547 
3548     PetscCall(PetscMalloc1(epe, &row));
3549     {
3550       PetscInt nrows = 0;
3551 
3552       for (PetscInt c = 0; c < dof[0]; ++c) {
3553         row[nrows].c   = c;
3554         row[nrows].loc = DMSTAG_BACK_DOWN_LEFT;
3555         ++nrows;
3556       }
3557       for (PetscInt c = 0; c < dof[1]; ++c) {
3558         row[nrows].c   = c;
3559         row[nrows].loc = DMSTAG_DOWN_LEFT;
3560         ++nrows;
3561       }
3562       for (PetscInt c = 0; c < dof[1]; ++c) {
3563         row[nrows].c   = c;
3564         row[nrows].loc = DMSTAG_BACK_LEFT;
3565         ++nrows;
3566       }
3567       for (PetscInt c = 0; c < dof[1]; ++c) {
3568         row[nrows].c   = c;
3569         row[nrows].loc = DMSTAG_BACK_DOWN;
3570         ++nrows;
3571       }
3572       for (PetscInt c = 0; c < dof[2]; ++c) {
3573         row[nrows].c   = c;
3574         row[nrows].loc = DMSTAG_LEFT;
3575         ++nrows;
3576       }
3577       for (PetscInt c = 0; c < dof[2]; ++c) {
3578         row[nrows].c   = c;
3579         row[nrows].loc = DMSTAG_DOWN;
3580         ++nrows;
3581       }
3582       for (PetscInt c = 0; c < dof[2]; ++c) {
3583         row[nrows].c   = c;
3584         row[nrows].loc = DMSTAG_BACK;
3585         ++nrows;
3586       }
3587       for (PetscInt c = 0; c < dof[3]; ++c) {
3588         row[nrows].c   = c;
3589         row[nrows].loc = DMSTAG_ELEMENT;
3590         ++nrows;
3591       }
3592     }
3593 
3594     PetscCall(PetscMalloc1(epe, &col));
3595     {
3596       PetscInt ncols = 0;
3597 
3598       for (PetscInt c = 0; c < dof[0]; ++c) {
3599         col[ncols].c   = c;
3600         col[ncols].loc = DMSTAG_BACK_DOWN_LEFT;
3601         ++ncols;
3602       }
3603       for (PetscInt c = 0; c < dof[1]; ++c) {
3604         col[ncols].c   = c;
3605         col[ncols].loc = DMSTAG_DOWN_LEFT;
3606         ++ncols;
3607       }
3608       for (PetscInt c = 0; c < dof[1]; ++c) {
3609         col[ncols].c   = c;
3610         col[ncols].loc = DMSTAG_BACK_LEFT;
3611         ++ncols;
3612       }
3613       for (PetscInt c = 0; c < dof[1]; ++c) {
3614         col[ncols].c   = c;
3615         col[ncols].loc = DMSTAG_BACK_DOWN;
3616         ++ncols;
3617       }
3618       for (PetscInt c = 0; c < dof[2]; ++c) {
3619         col[ncols].c   = c;
3620         col[ncols].loc = DMSTAG_LEFT;
3621         ++ncols;
3622       }
3623       for (PetscInt c = 0; c < dof[2]; ++c) {
3624         col[ncols].c   = c;
3625         col[ncols].loc = DMSTAG_DOWN;
3626         ++ncols;
3627       }
3628       for (PetscInt c = 0; c < dof[2]; ++c) {
3629         col[ncols].c   = c;
3630         col[ncols].loc = DMSTAG_BACK;
3631         ++ncols;
3632       }
3633       for (PetscInt c = 0; c < dof[3]; ++c) {
3634         col[ncols].c   = c;
3635         col[ncols].loc = DMSTAG_ELEMENT;
3636         ++ncols;
3637       }
3638     }
3639 
3640     for (PetscInt ez = start[2]; ez < start[2] + n[2] + n_extra[2]; ++ez) {
3641       for (PetscInt ey = start[1]; ey < start[1] + n[1] + n_extra[1]; ++ey) {
3642         for (PetscInt ex = start[0]; ex < start[0] + n[0] + n_extra[0]; ++ex) {
3643           for (PetscInt i = 0; i < epe; ++i) {
3644             row[i].i = ex;
3645             row[i].j = ey;
3646             row[i].k = ez;
3647           }
3648           for (PetscInt offset_z = -stencil_width; offset_z <= stencil_width; ++offset_z) {
3649             const PetscInt ez_offset = ez + offset_z;
3650             for (PetscInt offset_y = -stencil_width; offset_y <= stencil_width; ++offset_y) {
3651               const PetscInt ey_offset = ey + offset_y;
3652               for (PetscInt offset_x = -stencil_width; offset_x <= stencil_width; ++offset_x) {
3653                 const PetscInt  ex_offset     = ex + offset_x;
3654                 const PetscBool is_star_point = (PetscBool)(((offset_x == 0) && (offset_y == 0 || offset_z == 0)) || (offset_y == 0 && offset_z == 0));
3655                 /* Only set values corresponding to elements which can have non-dummy entries,
3656                    meaning those that map to unknowns in the global representation. In the periodic
3657                    case, this is the entire stencil, but in all other cases, only includes a single
3658                    "extra" element which is partially outside the physical domain (those points in the
3659                    global representation */
3660                 if ((stencil_type == DMSTAG_STENCIL_BOX || is_star_point) && (boundary_type[0] == DM_BOUNDARY_PERIODIC || (ex_offset < N[0] + 1 && ex_offset >= 0)) && (boundary_type[1] == DM_BOUNDARY_PERIODIC || (ey_offset < N[1] + 1 && ey_offset >= 0)) && (boundary_type[2] == DM_BOUNDARY_PERIODIC || (ez_offset < N[2] + 1 && ez_offset >= 0))) {
3661                   for (PetscInt i = 0; i < epe; ++i) {
3662                     col[i].i = ex_offset;
3663                     col[i].j = ey_offset;
3664                     col[i].k = ez_offset;
3665                   }
3666                   PetscCall(DMStagMatSetValuesStencil(dm, A, epe, row, epe, col, NULL, INSERT_VALUES));
3667                 }
3668               }
3669             }
3670           }
3671         }
3672       }
3673     }
3674     PetscCall(PetscFree(row));
3675     PetscCall(PetscFree(col));
3676   } else SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Unsupported stencil type %s", DMStagStencilTypes[stencil_type]);
3677   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
3678   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
3679   PetscFunctionReturn(PETSC_SUCCESS);
3680 }
3681