1 /* Functions specific to the 2-dimensional implementation of DMStag */
2 #include <petsc/private/dmstagimpl.h> /*I "petscdmstag.h" I*/
3
4 /*@
5 DMStagCreate2d - Create an object to manage data living on the elements, faces, and vertices of a parallelized regular 2D grid.
6
7 Collective
8
9 Input Parameters:
10 + comm - MPI communicator
11 . bndx - x boundary type, `DM_BOUNDARY_NONE`, `DM_BOUNDARY_PERIODIC`, or
12 `DM_BOUNDARY_GHOSTED`
13 . bndy - y boundary type, `DM_BOUNDARY_NONE`, `DM_BOUNDARY_PERIODIC`, or `DM_BOUNDARY_GHOSTED`
14 . M - global number of elements in x direction
15 . N - global number of elements in y direction
16 . m - number of ranks in the x direction (may be `PETSC_DECIDE`)
17 . n - number of ranks in the y direction (may be `PETSC_DECIDE`)
18 . dof0 - number of degrees of freedom per vertex/0-cell
19 . dof1 - number of degrees of freedom per face/1-cell
20 . dof2 - number of degrees of freedom per element/2-cell
21 . stencilType - ghost/halo region type: `DMSTAG_STENCIL_NONE`, `DMSTAG_STENCIL_BOX`, or `DMSTAG_STENCIL_STAR`
22 . stencilWidth - width, in elements, of halo/ghost region
23 . lx - array of local x element counts, of length equal to `m`, summing to `M`, or `NULL`
24 - ly - array of local y element counts, of length equal to `n`, summing to `N`, or `NULL`
25
26 Output Parameter:
27 . dm - the new `DMSTAG` object
28
29 Options Database Keys:
30 + -dm_view - calls `DMViewFromOptions()` at the conclusion of `DMSetUp()`
31 . -stag_grid_x <nx> - number of elements in the x direction
32 . -stag_grid_y <ny> - number of elements in the y direction
33 . -stag_ranks_x <rx> - number of ranks in the x direction
34 . -stag_ranks_y <ry> - number of ranks in the y direction
35 . -stag_ghost_stencil_width - width of ghost region, in elements
36 . -stag_boundary_type_x <none,ghosted,periodic> - `DMBoundaryType` value
37 - -stag_boundary_type_y <none,ghosted,periodic> - `DMBoundaryType` value
38
39 Level: beginner
40
41 Notes:
42 You must call `DMSetUp()` after this call, before using the `DM`.
43 If you wish to use the options database (see the keys above) to change values in the `DMSTAG`, you must call
44 `DMSetFromOptions()` after this function but before `DMSetUp()`.
45
46 .seealso: [](ch_stag), `DMSTAG`, `DMStagCreate1d()`, `DMStagCreate3d()`, `DMDestroy()`, `DMView()`, `DMCreateGlobalVector()`, `DMCreateLocalVector()`, `DMLocalToGlobalBegin()`, `DMDACreate2d()`
47 @*/
DMStagCreate2d(MPI_Comm comm,DMBoundaryType bndx,DMBoundaryType bndy,PetscInt M,PetscInt N,PetscInt m,PetscInt n,PetscInt dof0,PetscInt dof1,PetscInt dof2,DMStagStencilType stencilType,PetscInt stencilWidth,const PetscInt lx[],const PetscInt ly[],DM * dm)48 PetscErrorCode DMStagCreate2d(MPI_Comm comm, DMBoundaryType bndx, DMBoundaryType bndy, PetscInt M, PetscInt N, PetscInt m, PetscInt n, PetscInt dof0, PetscInt dof1, PetscInt dof2, DMStagStencilType stencilType, PetscInt stencilWidth, const PetscInt lx[], const PetscInt ly[], DM *dm)
49 {
50 PetscFunctionBegin;
51 PetscCall(DMCreate(comm, dm));
52 PetscCall(DMSetDimension(*dm, 2));
53 PetscCall(DMStagInitialize(bndx, bndy, DM_BOUNDARY_NONE, M, N, 0, m, n, 0, dof0, dof1, dof2, 0, stencilType, stencilWidth, lx, ly, NULL, *dm));
54 PetscFunctionReturn(PETSC_SUCCESS);
55 }
56
DMStagRestrictSimple_2d(DM dmf,Vec xf_local,DM dmc,Vec xc_local)57 PETSC_INTERN PetscErrorCode DMStagRestrictSimple_2d(DM dmf, Vec xf_local, DM dmc, Vec xc_local)
58 {
59 PetscInt Mf, Nf, Mc, Nc, factorx, factory, dof[3];
60 PetscInt xc, yc, mc, nc, nExtraxc, nExtrayc, i, j, d;
61 PetscInt idownleftf, ileftf, idownf, ielemf, idownleftc, ileftc, idownc, ielemc;
62 const PetscScalar ***arrf;
63 PetscScalar ***arrc;
64
65 PetscFunctionBegin;
66 PetscCall(DMStagGetGlobalSizes(dmf, &Mf, &Nf, NULL));
67 PetscCall(DMStagGetGlobalSizes(dmc, &Mc, &Nc, NULL));
68 factorx = Mf / Mc;
69 factory = Nf / Nc;
70 PetscCall(DMStagGetDOF(dmc, &dof[0], &dof[1], &dof[2], NULL));
71
72 PetscCall(DMStagGetCorners(dmc, &xc, &yc, NULL, &mc, &nc, NULL, &nExtraxc, &nExtrayc, NULL));
73 PetscCall(VecZeroEntries(xc_local));
74 PetscCall(DMStagVecGetArray(dmf, xf_local, &arrf));
75 PetscCall(DMStagVecGetArray(dmc, xc_local, &arrc));
76 PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_DOWN_LEFT, 0, &idownleftf));
77 PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_LEFT, 0, &ileftf));
78 PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_DOWN, 0, &idownf));
79 PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_ELEMENT, 0, &ielemf));
80 PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_DOWN_LEFT, 0, &idownleftc));
81 PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_LEFT, 0, &ileftc));
82 PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_DOWN, 0, &idownc));
83 PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_ELEMENT, 0, &ielemc));
84
85 for (d = 0; d < dof[0]; ++d)
86 for (j = yc; j < yc + nc + nExtrayc; ++j)
87 for (i = xc; i < xc + mc + nExtraxc; ++i) {
88 const PetscInt ii = factorx * i, jj = factory * j;
89
90 arrc[j][i][idownleftc + d] = arrf[jj][ii][idownleftf + d];
91 }
92
93 for (d = 0; d < dof[1]; ++d)
94 for (j = yc; j < yc + nc; ++j)
95 for (i = xc; i < xc + mc + nExtraxc; ++i) {
96 const PetscInt ii = factorx * i, jj = factory * j + factory / 2;
97
98 if (factory % 2 == 0) arrc[j][i][ileftc + d] = 0.5 * (arrf[jj - 1][ii][ileftf + d] + arrf[jj][ii][ileftf + d]);
99 else arrc[j][i][ileftc + d] = arrf[jj][ii][ileftf + d];
100 }
101
102 for (d = 0; d < dof[1]; ++d)
103 for (j = yc; j < yc + nc + nExtrayc; ++j)
104 for (i = xc; i < xc + mc; ++i) {
105 const PetscInt ii = factorx * i + factorx / 2, jj = factory * j;
106
107 if (factorx % 2 == 0) arrc[j][i][idownc + d] = 0.5 * (arrf[jj][ii - 1][idownf + d] + arrf[jj][ii][idownf + d]);
108 else arrc[j][i][idownc + d] = arrf[jj][ii][idownf + d];
109 }
110
111 for (d = 0; d < dof[2]; ++d)
112 for (j = yc; j < yc + nc; ++j)
113 for (i = xc; i < xc + mc; ++i) {
114 const PetscInt ii = factorx * i + factorx / 2, jj = factory * j + factory / 2;
115
116 if (factorx % 2 == 0 && factory % 2 == 0) arrc[j][i][ielemc + d] = 0.25 * (arrf[jj - 1][ii - 1][ielemf + d] + arrf[jj][ii - 1][ielemf + d] + arrf[jj - 1][ii][ielemf + d] + arrf[jj][ii][ielemf + d]);
117 else if (factorx % 2 == 0) arrc[j][i][ielemc + d] = 0.5 * (arrf[jj - 1][ii - 1][ielemf + d] + arrf[jj][ii - 1][ielemf + d]);
118 else if (factory % 2 == 0) arrc[j][i][ielemc + d] = 0.5 * (arrf[jj - 1][ii - 1][ielemf + d] + arrf[jj - 1][ii][ielemf + d]);
119 else arrc[j][i][ielemc + d] = arrf[jj][ii][ielemf + d];
120 }
121
122 PetscCall(DMStagVecRestoreArray(dmf, xf_local, &arrf));
123 PetscCall(DMStagVecRestoreArray(dmc, xc_local, &arrc));
124 PetscFunctionReturn(PETSC_SUCCESS);
125 }
126
DMStagSetUniformCoordinatesExplicit_2d(DM dm,PetscReal xmin,PetscReal xmax,PetscReal ymin,PetscReal ymax)127 PETSC_INTERN PetscErrorCode DMStagSetUniformCoordinatesExplicit_2d(DM dm, PetscReal xmin, PetscReal xmax, PetscReal ymin, PetscReal ymax)
128 {
129 DM_Stag *stagCoord;
130 DM dmCoord;
131 Vec coordLocal;
132 PetscReal h[2], min[2];
133 PetscScalar ***arr;
134 PetscInt ind[2], start_ghost[2], n_ghost[2], s, c;
135 PetscInt idownleft, idown, ileft, ielement;
136
137 PetscFunctionBegin;
138 PetscCall(DMGetCoordinateDM(dm, &dmCoord));
139 stagCoord = (DM_Stag *)dmCoord->data;
140 for (s = 0; s < 3; ++s) {
141 PetscCheck(stagCoord->dof[s] == 0 || stagCoord->dof[s] == 2, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Coordinate DM in 2 dimensions must have 0 or 2 dof on each stratum, but stratum %" PetscInt_FMT " has %" PetscInt_FMT " dof", s,
142 stagCoord->dof[s]);
143 }
144 PetscCall(DMCreateLocalVector(dmCoord, &coordLocal));
145
146 PetscCall(DMStagVecGetArray(dmCoord, coordLocal, &arr));
147 if (stagCoord->dof[0]) PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_DOWN_LEFT, 0, &idownleft));
148 if (stagCoord->dof[1]) {
149 PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_DOWN, 0, &idown));
150 PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_LEFT, 0, &ileft));
151 }
152 if (stagCoord->dof[2]) PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_ELEMENT, 0, &ielement));
153 PetscCall(DMStagGetGhostCorners(dmCoord, &start_ghost[0], &start_ghost[1], NULL, &n_ghost[0], &n_ghost[1], NULL));
154
155 min[0] = xmin;
156 min[1] = ymin;
157 h[0] = (xmax - xmin) / stagCoord->N[0];
158 h[1] = (ymax - ymin) / stagCoord->N[1];
159
160 for (ind[1] = start_ghost[1]; ind[1] < start_ghost[1] + n_ghost[1]; ++ind[1]) {
161 for (ind[0] = start_ghost[0]; ind[0] < start_ghost[0] + n_ghost[0]; ++ind[0]) {
162 if (stagCoord->dof[0]) {
163 const PetscReal offs[2] = {0.0, 0.0};
164 for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][idownleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
165 }
166 if (stagCoord->dof[1]) {
167 const PetscReal offs[2] = {0.5, 0.0};
168 for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][idown + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
169 }
170 if (stagCoord->dof[1]) {
171 const PetscReal offs[2] = {0.0, 0.5};
172 for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][ileft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
173 }
174 if (stagCoord->dof[2]) {
175 const PetscReal offs[2] = {0.5, 0.5};
176 for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][ielement + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
177 }
178 }
179 }
180 PetscCall(DMStagVecRestoreArray(dmCoord, coordLocal, &arr));
181 PetscCall(DMSetCoordinatesLocal(dm, coordLocal));
182 PetscCall(VecDestroy(&coordLocal));
183 PetscFunctionReturn(PETSC_SUCCESS);
184 }
185
186 /* Helper functions used in DMSetUp_Stag() */
187 static PetscErrorCode DMStagSetUpBuildRankGrid_2d(DM);
188 static PetscErrorCode DMStagSetUpBuildNeighbors_2d(DM);
189 static PetscErrorCode DMStagSetUpBuildGlobalOffsets_2d(DM, PetscInt **);
190 static PetscErrorCode DMStagComputeLocationOffsets_2d(DM);
191
DMSetUp_Stag_2d(DM dm)192 PETSC_INTERN PetscErrorCode DMSetUp_Stag_2d(DM dm)
193 {
194 DM_Stag *const stag = (DM_Stag *)dm->data;
195 PetscMPIInt size, rank;
196 PetscInt i, j, d, entriesPerElementRowGhost, entriesPerCorner, entriesPerFace, entriesPerElementRow;
197 MPI_Comm comm;
198 PetscInt *globalOffsets;
199 PetscBool star, dummyStart[2], dummyEnd[2];
200 const PetscInt dim = 2;
201
202 PetscFunctionBegin;
203 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
204 PetscCallMPI(MPI_Comm_size(comm, &size));
205 PetscCallMPI(MPI_Comm_rank(comm, &rank));
206
207 /* Rank grid sizes (populates stag->nRanks) */
208 PetscCall(DMStagSetUpBuildRankGrid_2d(dm));
209
210 /* Determine location of rank in grid (these get extra boundary points on the last element)
211 Order is x-fast, as usual */
212 stag->rank[0] = rank % stag->nRanks[0];
213 stag->rank[1] = rank / stag->nRanks[0];
214 for (i = 0; i < dim; ++i) {
215 stag->firstRank[i] = PetscNot(stag->rank[i]);
216 stag->lastRank[i] = (PetscBool)(stag->rank[i] == stag->nRanks[i] - 1);
217 }
218
219 /* Determine Locally owned region
220
221 Divide equally, giving lower ranks in each dimension and extra element if needbe.
222
223 Note that this uses O(P) storage. If this ever becomes an issue, this could
224 be refactored to not keep this data around. */
225 for (i = 0; i < dim; ++i) {
226 if (!stag->l[i]) {
227 const PetscInt Ni = stag->N[i], nRanksi = stag->nRanks[i];
228 PetscCall(PetscMalloc1(stag->nRanks[i], &stag->l[i]));
229 for (j = 0; j < stag->nRanks[i]; ++j) stag->l[i][j] = Ni / nRanksi + ((Ni % nRanksi) > j);
230 }
231 }
232
233 /* Retrieve local size in stag->n */
234 for (i = 0; i < dim; ++i) stag->n[i] = stag->l[i][stag->rank[i]];
235 if (PetscDefined(USE_DEBUG)) {
236 for (i = 0; i < dim; ++i) {
237 PetscInt Ncheck, j;
238 Ncheck = 0;
239 for (j = 0; j < stag->nRanks[i]; ++j) Ncheck += stag->l[i][j];
240 PetscCheck(Ncheck == stag->N[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local sizes in dimension %" PetscInt_FMT " don't add up. %" PetscInt_FMT " != %" PetscInt_FMT, i, Ncheck, stag->N[i]);
241 }
242 }
243
244 /* Compute starting elements */
245 for (i = 0; i < dim; ++i) {
246 stag->start[i] = 0;
247 for (j = 0; j < stag->rank[i]; ++j) stag->start[i] += stag->l[i][j];
248 }
249
250 /* Determine ranks of neighbors, using DMDA's convention
251
252 n6 n7 n8
253 n3 n5
254 n0 n1 n2 */
255 PetscCall(DMStagSetUpBuildNeighbors_2d(dm));
256
257 /* Determine whether the ghost region includes dummies or not. This is currently
258 equivalent to having a non-periodic boundary. If not, then
259 ghostOffset{Start,End}[d] elements correspond to elements on the neighbor.
260 If true, then
261 - at the start, there are ghostOffsetStart[d] ghost elements
262 - at the end, there is a layer of extra "physical" points inside a layer of
263 ghostOffsetEnd[d] ghost elements
264 Note that this computation should be updated if any boundary types besides
265 NONE, GHOSTED, and PERIODIC are supported. */
266 for (d = 0; d < 2; ++d) dummyStart[d] = (PetscBool)(stag->firstRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
267 for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
268
269 /* Define useful sizes */
270 stag->entriesPerElement = stag->dof[0] + 2 * stag->dof[1] + stag->dof[2];
271 entriesPerFace = stag->dof[0] + stag->dof[1];
272 entriesPerCorner = stag->dof[0];
273 entriesPerElementRow = stag->n[0] * stag->entriesPerElement + (dummyEnd[0] ? entriesPerFace : 0);
274 stag->entries = stag->n[1] * entriesPerElementRow + (dummyEnd[1] ? stag->n[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
275
276 /* Compute offsets for each rank into global vectors
277 This again requires O(P) storage, which could be replaced with some global
278 communication. */
279 PetscCall(DMStagSetUpBuildGlobalOffsets_2d(dm, &globalOffsets));
280
281 for (d = 0; d < dim; ++d)
282 PetscCheck(stag->boundaryType[d] == DM_BOUNDARY_NONE || stag->boundaryType[d] == DM_BOUNDARY_PERIODIC || stag->boundaryType[d] == DM_BOUNDARY_GHOSTED, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported boundary type");
283
284 /* Define ghosted/local sizes */
285 PetscCheck(stag->stencilType == DMSTAG_STENCIL_NONE || !(stag->n[0] < stag->stencilWidth || stag->n[1] < stag->stencilWidth), PETSC_COMM_SELF, PETSC_ERR_SUP, "DMStag 2d setup does not support local sizes (%" PetscInt_FMT " x %" PetscInt_FMT ") smaller than the elementwise stencil width (%" PetscInt_FMT ")",
286 stag->n[0], stag->n[1], stag->stencilWidth);
287 for (d = 0; d < dim; ++d) {
288 switch (stag->boundaryType[d]) {
289 case DM_BOUNDARY_NONE:
290 /* Note: for a elements-only DMStag, the extra elements on the faces aren't necessary but we include them anyway */
291 switch (stag->stencilType) {
292 case DMSTAG_STENCIL_NONE: /* only the extra one on the right/top faces */
293 stag->nGhost[d] = stag->n[d];
294 stag->startGhost[d] = stag->start[d];
295 if (stag->lastRank[d]) stag->nGhost[d] += 1;
296 break;
297 case DMSTAG_STENCIL_STAR: /* allocate the corners but don't use them */
298 case DMSTAG_STENCIL_BOX:
299 stag->nGhost[d] = stag->n[d];
300 stag->startGhost[d] = stag->start[d];
301 if (!stag->firstRank[d]) {
302 stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
303 stag->startGhost[d] -= stag->stencilWidth;
304 }
305 if (!stag->lastRank[d]) {
306 stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
307 } else {
308 stag->nGhost[d] += 1; /* one element on the boundary to complete blocking */
309 }
310 break;
311 default:
312 SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
313 }
314 break;
315 case DM_BOUNDARY_GHOSTED:
316 switch (stag->stencilType) {
317 case DMSTAG_STENCIL_NONE:
318 stag->startGhost[d] = stag->start[d];
319 stag->nGhost[d] = stag->n[d] + (stag->lastRank[d] ? 1 : 0);
320 break;
321 case DMSTAG_STENCIL_STAR:
322 case DMSTAG_STENCIL_BOX:
323 stag->startGhost[d] = stag->start[d] - stag->stencilWidth; /* This value may be negative */
324 stag->nGhost[d] = stag->n[d] + 2 * stag->stencilWidth + (stag->lastRank[d] && stag->stencilWidth == 0 ? 1 : 0);
325 break;
326 default:
327 SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
328 }
329 break;
330 case DM_BOUNDARY_PERIODIC:
331 switch (stag->stencilType) {
332 case DMSTAG_STENCIL_NONE: /* only the extra one on the right/top faces */
333 stag->nGhost[d] = stag->n[d];
334 stag->startGhost[d] = stag->start[d];
335 break;
336 case DMSTAG_STENCIL_STAR:
337 case DMSTAG_STENCIL_BOX:
338 stag->nGhost[d] = stag->n[d] + 2 * stag->stencilWidth;
339 stag->startGhost[d] = stag->start[d] - stag->stencilWidth;
340 break;
341 default:
342 SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
343 }
344 break;
345 default:
346 SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported boundary type in dimension %" PetscInt_FMT, d);
347 }
348 }
349 stag->entriesGhost = stag->nGhost[0] * stag->nGhost[1] * stag->entriesPerElement;
350 entriesPerElementRowGhost = stag->nGhost[0] * stag->entriesPerElement;
351
352 /* Create global-->local VecScatter and local->global ISLocalToGlobalMapping
353
354 We iterate over all local points twice. First, we iterate over each neighbor, populating
355 1. idxLocal[] : the subset of points, in local numbering ("S" from 0 on all points including ghosts), which correspond to global points. That is, the set of all non-dummy points in the ghosted representation
356 2. idxGlobal[]: the corresponding global points, in global numbering (Nested "S"s - ranks then non-ghost points in each rank)
357
358 Next, we iterate over all points in the local ordering, populating
359 3. idxGlobalAll[] : entry i is the global point corresponding to local point i, or -1 if local point i is a dummy.
360
361 Note further here that the local/ghosted vectors:
362 - Are always an integral number of elements-worth of points, in all directions.
363 - Contain three flavors of points:
364 1. Points which "live here" in the global representation
365 2. Ghost points which correspond to points on other ranks in the global representation
366 3. Ghost points, which we call "dummy points," which do not correspond to any point in the global representation
367
368 Dummy ghost points arise in at least three ways:
369 1. As padding for the right, top, and front physical boundaries, to complete partial elements
370 2. As unused space in the "corners" on interior ranks when using a star stencil
371 3. As additional work space on all physical boundaries, when DM_BOUNDARY_GHOSTED is used
372
373 Note that, because of the boundary dummies,
374 with a stencil width of zero, on 1 rank, local and global vectors
375 are still different!
376
377 We assume that the size on each rank is greater than or equal to the
378 stencil width.
379 */
380
381 /* Check stencil type */
382 PetscCheck(stag->stencilType == DMSTAG_STENCIL_NONE || stag->stencilType == DMSTAG_STENCIL_BOX || stag->stencilType == DMSTAG_STENCIL_STAR, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported stencil type %s", DMStagStencilTypes[stag->stencilType]);
383 star = (PetscBool)(stag->stencilType == DMSTAG_STENCIL_STAR || stag->stencilType == DMSTAG_STENCIL_NONE);
384
385 {
386 PetscInt *idxLocal, *idxGlobal, *idxGlobalAll;
387 PetscInt count, countAll, entriesToTransferTotal, i, j, d, ghostOffsetStart[2], ghostOffsetEnd[2];
388 IS isLocal, isGlobal;
389 PetscInt jghost, ighost;
390 PetscInt nNeighbors[9][2];
391 PetscBool nextToDummyEnd[2];
392
393 /* Compute numbers of elements on each neighbor */
394 for (i = 0; i < 9; ++i) {
395 const PetscInt neighborRank = stag->neighbors[i];
396 if (neighborRank >= 0) { /* note we copy the values for our own rank (neighbor 4) */
397 nNeighbors[i][0] = stag->l[0][neighborRank % stag->nRanks[0]];
398 nNeighbors[i][1] = stag->l[1][neighborRank / stag->nRanks[0]];
399 } else {
400 nNeighbors[i][0] = 0;
401 nNeighbors[i][1] = 0;
402 }
403 }
404
405 /* These offsets should always be non-negative, and describe how many
406 ghost elements exist at each boundary. These are not always equal to the stencil width,
407 because we may have different numbers of ghost elements at the boundaries. In particular,
408 we always have at least one ghost (dummy) element at the right/top/front. */
409 for (d = 0; d < 2; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
410 for (d = 0; d < 2; ++d) ghostOffsetEnd[d] = stag->startGhost[d] + stag->nGhost[d] - (stag->start[d] + stag->n[d]);
411
412 /* Compute whether the next rank has an extra point (only used in x direction) */
413 for (d = 0; d < 2; ++d) nextToDummyEnd[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->rank[d] == stag->nRanks[d] - 2);
414
415 /* Compute the number of local entries which correspond to any global entry */
416 {
417 PetscInt nNonDummyGhost[2];
418 for (d = 0; d < 2; ++d) nNonDummyGhost[d] = stag->nGhost[d] - (dummyStart[d] ? ghostOffsetStart[d] : 0) - (dummyEnd[d] ? ghostOffsetEnd[d] : 0);
419 if (star) {
420 entriesToTransferTotal = (nNonDummyGhost[0] * stag->n[1] + stag->n[0] * nNonDummyGhost[1] - stag->n[0] * stag->n[1]) * stag->entriesPerElement + (dummyEnd[0] ? nNonDummyGhost[1] * entriesPerFace : 0) + (dummyEnd[1] ? nNonDummyGhost[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
421 } else {
422 entriesToTransferTotal = nNonDummyGhost[0] * nNonDummyGhost[1] * stag->entriesPerElement + (dummyEnd[0] ? nNonDummyGhost[1] * entriesPerFace : 0) + (dummyEnd[1] ? nNonDummyGhost[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
423 }
424 }
425
426 /* Allocate arrays to populate */
427 PetscCall(PetscMalloc1(entriesToTransferTotal, &idxLocal));
428 PetscCall(PetscMalloc1(entriesToTransferTotal, &idxGlobal));
429
430 /* Counts into idxLocal/idxGlobal */
431 count = 0;
432
433 /* Here and below, we work with (i,j) describing element numbers within a neighboring rank's global ordering,
434 to be offset by that rank's global offset,
435 and (ighost,jghost) referring to element numbers within this ranks local (ghosted) ordering */
436
437 /* Neighbor 0 (down left) */
438 if (!star && !dummyStart[0] && !dummyStart[1]) {
439 const PetscInt neighbor = 0;
440 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
441 const PetscInt *const nNeighbor = nNeighbors[neighbor];
442 const PetscInt entriesPerElementRowNeighbor = stag->entriesPerElement * nNeighbor[0];
443 for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
444 const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
445 for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
446 const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
447 for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
448 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
449 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
450 }
451 }
452 }
453 }
454
455 /* Neighbor 1 (down) */
456 if (!dummyStart[1]) {
457 /* We may be a ghosted boundary in x, in which case the neighbor is also */
458 const PetscInt neighbor = 1;
459 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
460 const PetscInt *const nNeighbor = nNeighbors[neighbor];
461 const PetscInt entriesPerElementRowNeighbor = entriesPerElementRow; /* same as here */
462 for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
463 const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
464 for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
465 const PetscInt i = ighost - ghostOffsetStart[0];
466 for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
467 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
468 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
469 }
470 }
471 if (dummyEnd[0]) {
472 const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0];
473 const PetscInt i = stag->n[0];
474 for (d = 0; d < stag->dof[0]; ++d, ++count) { /* Vertex */
475 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
476 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
477 }
478 for (d = 0; d < stag->dof[1]; ++d, ++count) { /* Face */
479 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + stag->dof[0] + d;
480 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
481 }
482 }
483 }
484 }
485
486 /* Neighbor 2 (down right) */
487 if (!star && !dummyEnd[0] && !dummyStart[1]) {
488 const PetscInt neighbor = 2;
489 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
490 const PetscInt *const nNeighbor = nNeighbors[neighbor];
491 const PetscInt entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
492 for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
493 const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
494 for (i = 0; i < ghostOffsetEnd[0]; ++i) {
495 const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
496 for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
497 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
498 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
499 }
500 }
501 }
502 }
503
504 /* Neighbor 3 (left) */
505 if (!dummyStart[0]) {
506 /* Our neighbor is never a ghosted boundary in x, but we may be
507 Here, we may be a ghosted boundary in y and thus so will our neighbor be */
508 const PetscInt neighbor = 3;
509 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
510 const PetscInt *const nNeighbor = nNeighbors[neighbor];
511 const PetscInt entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement;
512 for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
513 const PetscInt j = jghost - ghostOffsetStart[1];
514 for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
515 const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
516 for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
517 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
518 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
519 }
520 }
521 }
522 if (dummyEnd[1]) {
523 const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1];
524 const PetscInt j = stag->n[1];
525 for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
526 const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
527 for (d = 0; d < entriesPerFace; ++d, ++count) { /* only vertices and horizontal face (which are the first dof) */
528 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * entriesPerFace + d; /* i moves by face here */
529 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
530 }
531 }
532 }
533 }
534
535 /* Interior/Resident-here-in-global elements ("Neighbor 4" - same rank)
536 *including* entries from boundary dummy elements */
537 {
538 const PetscInt neighbor = 4;
539 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
540 for (j = 0; j < stag->n[1]; ++j) {
541 const PetscInt jghost = j + ghostOffsetStart[1];
542 for (i = 0; i < stag->n[0]; ++i) {
543 const PetscInt ighost = i + ghostOffsetStart[0];
544 for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
545 idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
546 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
547 }
548 }
549 if (dummyEnd[0]) {
550 const PetscInt ighost = i + ghostOffsetStart[0];
551 i = stag->n[0];
552 for (d = 0; d < stag->dof[0]; ++d, ++count) { /* vertex first */
553 idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
554 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
555 }
556 for (d = 0; d < stag->dof[1]; ++d, ++count) { /* then left edge (skipping bottom face) */
557 idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + stag->dof[0] + d;
558 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
559 }
560 }
561 }
562 if (dummyEnd[1]) {
563 const PetscInt jghost = j + ghostOffsetStart[1];
564 j = stag->n[1];
565 for (i = 0; i < stag->n[0]; ++i) {
566 const PetscInt ighost = i + ghostOffsetStart[0];
567 for (d = 0; d < entriesPerFace; ++d, ++count) { /* vertex and bottom face (which are the first entries) */
568 idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
569 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
570 }
571 }
572 if (dummyEnd[0]) {
573 const PetscInt ighost = i + ghostOffsetStart[0];
574 i = stag->n[0];
575 for (d = 0; d < entriesPerCorner; ++d, ++count) { /* vertex only */
576 idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
577 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
578 }
579 }
580 }
581 }
582
583 /* Neighbor 5 (right) */
584 if (!dummyEnd[0]) {
585 /* We can never be right boundary, but we may be a top boundary, along with the right neighbor */
586 const PetscInt neighbor = 5;
587 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
588 const PetscInt *const nNeighbor = nNeighbors[neighbor];
589 const PetscInt entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
590 for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
591 const PetscInt j = jghost - ghostOffsetStart[1];
592 for (i = 0; i < ghostOffsetEnd[0]; ++i) {
593 const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
594 for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
595 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
596 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
597 }
598 }
599 }
600 if (dummyEnd[1]) {
601 const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1];
602 const PetscInt j = nNeighbor[1];
603 for (i = 0; i < ghostOffsetEnd[0]; ++i) {
604 const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
605 for (d = 0; d < entriesPerFace; ++d, ++count) { /* only vertices and horizontal face (which are the first dof) */
606 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * entriesPerFace + d; /* Note i increment by entriesPerFace */
607 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
608 }
609 }
610 }
611 }
612
613 /* Neighbor 6 (up left) */
614 if (!star && !dummyStart[0] && !dummyEnd[1]) {
615 /* We can never be a top boundary, but our neighbor may be
616 We may be a right boundary, but our neighbor cannot be */
617 const PetscInt neighbor = 6;
618 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
619 const PetscInt *const nNeighbor = nNeighbors[neighbor];
620 const PetscInt entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement;
621 for (j = 0; j < ghostOffsetEnd[1]; ++j) {
622 const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
623 for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
624 const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
625 for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
626 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
627 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
628 }
629 }
630 }
631 }
632
633 /* Neighbor 7 (up) */
634 if (!dummyEnd[1]) {
635 /* We cannot be the last rank in y, though our neighbor may be
636 We may be the last rank in x, in which case our neighbor is also */
637 const PetscInt neighbor = 7;
638 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
639 const PetscInt *const nNeighbor = nNeighbors[neighbor];
640 const PetscInt entriesPerElementRowNeighbor = entriesPerElementRow; /* same as here */
641 for (j = 0; j < ghostOffsetEnd[1]; ++j) {
642 const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
643 for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
644 const PetscInt i = ighost - ghostOffsetStart[0];
645 for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
646 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
647 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
648 }
649 }
650 if (dummyEnd[0]) {
651 const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0];
652 const PetscInt i = nNeighbor[0];
653 for (d = 0; d < stag->dof[0]; ++d, ++count) { /* Vertex */
654 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
655 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
656 }
657 for (d = 0; d < stag->dof[1]; ++d, ++count) { /* Face */
658 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + stag->dof[0] + d;
659 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
660 }
661 }
662 }
663 }
664
665 /* Neighbor 8 (up right) */
666 if (!star && !dummyEnd[0] && !dummyEnd[1]) {
667 /* We can never be a ghosted boundary
668 Our neighbor may be a top boundary, a right boundary, or both */
669 const PetscInt neighbor = 8;
670 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
671 const PetscInt *const nNeighbor = nNeighbors[neighbor];
672 const PetscInt entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
673 for (j = 0; j < ghostOffsetEnd[1]; ++j) {
674 const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
675 for (i = 0; i < ghostOffsetEnd[0]; ++i) {
676 const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
677 for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
678 idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
679 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
680 }
681 }
682 }
683 }
684
685 PetscCheck(count == entriesToTransferTotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of entries computed in gtol (%" PetscInt_FMT ") is not as expected (%" PetscInt_FMT ")", count, entriesToTransferTotal);
686
687 /* Create Local and Global ISs (transferring pointer ownership) */
688 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), entriesToTransferTotal, idxLocal, PETSC_OWN_POINTER, &isLocal));
689 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), entriesToTransferTotal, idxGlobal, PETSC_OWN_POINTER, &isGlobal));
690
691 /* Create stag->gtol. The order is computed as PETSc ordering, and doesn't include dummy entries */
692 {
693 Vec local, global;
694 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm), 1, stag->entries, PETSC_DECIDE, NULL, &global));
695 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, PetscMax(stag->entriesPerElement, 1), stag->entriesGhost, NULL, &local));
696 PetscCall(VecScatterCreate(global, isGlobal, local, isLocal, &stag->gtol));
697 PetscCall(VecDestroy(&global));
698 PetscCall(VecDestroy(&local));
699 }
700
701 /* Destroy ISs */
702 PetscCall(ISDestroy(&isLocal));
703 PetscCall(ISDestroy(&isGlobal));
704
705 /* Next, we iterate over the local entries again, in local order, recording the global entry to which each maps,
706 or -1 if there is none */
707 PetscCall(PetscMalloc1(stag->entriesGhost, &idxGlobalAll));
708
709 countAll = 0;
710
711 /* Loop over rows 1/3 : down */
712 if (!dummyStart[1]) {
713 for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
714 /* Loop over columns 1/3 : down left */
715 if (!star && !dummyStart[0]) {
716 const PetscInt neighbor = 0;
717 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
718 const PetscInt *const nNeighbor = nNeighbors[neighbor];
719 const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost; /* Note: this is actually the same value for the whole row of ranks below, so recomputing it for the next two ranks is redundant, and one could even get rid of jghost entirely if desired */
720 const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement;
721 for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
722 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
723 }
724 } else {
725 /* Down Left dummies */
726 for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
727 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
728 }
729 }
730
731 /* Loop over columns 2/3 : down middle */
732 {
733 const PetscInt neighbor = 1;
734 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
735 const PetscInt *const nNeighbor = nNeighbors[neighbor];
736 const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
737 const PetscInt eprNeighbor = entriesPerElementRow; /* same as here */
738 for (i = 0; i < nNeighbor[0]; ++i) {
739 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
740 }
741 }
742
743 /* Loop over columns 3/3 : down right */
744 if (!star && !dummyEnd[0]) {
745 const PetscInt neighbor = 2;
746 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
747 const PetscInt *const nNeighbor = nNeighbors[neighbor];
748 const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
749 const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
750 for (i = 0; i < ghostOffsetEnd[0]; ++i) {
751 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
752 }
753 } else if (dummyEnd[0]) {
754 /* Down right partial dummy elements, living on the *down* rank */
755 const PetscInt neighbor = 1;
756 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
757 const PetscInt *const nNeighbor = nNeighbors[neighbor];
758 const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
759 const PetscInt eprNeighbor = entriesPerElementRow; /* same as here */
760 PetscInt dGlobal;
761 i = nNeighbor[0];
762 for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
763 for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy down face point */
764 for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
765 for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy element point */
766 ++i;
767 for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
768 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
769 }
770 } else {
771 /* Down Right dummies */
772 for (ighost = 0; ighost < ghostOffsetEnd[0]; ++ighost) {
773 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
774 }
775 }
776 }
777 } else {
778 /* Down dummies row */
779 for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
780 for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
781 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
782 }
783 }
784 }
785
786 /* Loop over rows 2/3 : center */
787 for (j = 0; j < stag->n[1]; ++j) {
788 /* Loop over columns 1/3 : left */
789 if (!dummyStart[0]) {
790 const PetscInt neighbor = 3;
791 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
792 const PetscInt *const nNeighbor = nNeighbors[neighbor];
793 const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement;
794 for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
795 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
796 }
797 } else {
798 /* (Middle) Left dummies */
799 for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
800 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
801 }
802 }
803
804 /* Loop over columns 2/3 : here (the "neighbor" is ourselves, here) */
805 {
806 const PetscInt neighbor = 4;
807 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
808 const PetscInt eprNeighbor = entriesPerElementRow; /* same as here (obviously) */
809 for (i = 0; i < stag->n[0]; ++i) {
810 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
811 }
812 }
813
814 /* Loop over columns 3/3 : right */
815 if (!dummyEnd[0]) {
816 const PetscInt neighbor = 5;
817 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
818 const PetscInt *const nNeighbor = nNeighbors[neighbor];
819 const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
820 for (i = 0; i < ghostOffsetEnd[0]; ++i) {
821 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
822 }
823 } else {
824 /* -1's for right layer of partial dummies, living on *this* rank */
825 const PetscInt neighbor = 4;
826 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
827 const PetscInt *const nNeighbor = nNeighbors[neighbor];
828 const PetscInt eprNeighbor = entriesPerElementRow; /* same as here (obviously) */
829 PetscInt dGlobal;
830 i = nNeighbor[0];
831 for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
832 for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy down face point */
833 for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
834 for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy element point */
835 ++i;
836 for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
837 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
838 }
839 }
840 }
841
842 /* Loop over rows 3/3 : up */
843 if (!dummyEnd[1]) {
844 for (j = 0; j < ghostOffsetEnd[1]; ++j) {
845 /* Loop over columns 1/3 : up left */
846 if (!star && !dummyStart[0]) {
847 const PetscInt neighbor = 6;
848 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
849 const PetscInt *const nNeighbor = nNeighbors[neighbor];
850 const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement;
851 for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
852 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
853 }
854 } else {
855 /* Up Left dummies */
856 for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
857 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
858 }
859 }
860
861 /* Loop over columns 2/3 : up */
862 {
863 const PetscInt neighbor = 7;
864 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
865 const PetscInt *const nNeighbor = nNeighbors[neighbor];
866 const PetscInt eprNeighbor = entriesPerElementRow; /* Same as here */
867 for (i = 0; i < nNeighbor[0]; ++i) {
868 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
869 }
870 }
871
872 /* Loop over columns 3/3 : up right */
873 if (!star && !dummyEnd[0]) {
874 const PetscInt neighbor = 8;
875 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
876 const PetscInt *const nNeighbor = nNeighbors[neighbor];
877 const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
878 for (i = 0; i < ghostOffsetEnd[0]; ++i) {
879 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
880 }
881 } else if (dummyEnd[0]) {
882 /* -1's for right layer of partial dummies, living on rank above */
883 const PetscInt neighbor = 7;
884 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
885 const PetscInt *const nNeighbor = nNeighbors[neighbor];
886 const PetscInt eprNeighbor = entriesPerElementRow; /* Same as here */
887 PetscInt dGlobal;
888 i = nNeighbor[0];
889 for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
890 for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy down face point */
891 for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
892 for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy element point */
893 ++i;
894 for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
895 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
896 }
897 } else {
898 /* Up Right dummies */
899 for (ighost = 0; ighost < ghostOffsetEnd[0]; ++ighost) {
900 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
901 }
902 }
903 }
904 } else {
905 j = stag->n[1];
906 /* Top layer of partial dummies */
907
908 /* up left partial dummies layer : Loop over columns 1/3 : living on *left* neighbor */
909 if (!dummyStart[0]) {
910 const PetscInt neighbor = 3;
911 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
912 const PetscInt *const nNeighbor = nNeighbors[neighbor];
913 const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement;
914 for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
915 for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */
916 for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy left face and element points */
917 }
918 } else {
919 for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
920 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
921 }
922 }
923
924 /* up partial dummies layer : Loop over columns 2/3 : living on *this* rank */
925 {
926 const PetscInt neighbor = 4;
927 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
928 const PetscInt eprNeighbor = entriesPerElementRow; /* same as here (obviously) */
929 for (i = 0; i < stag->n[0]; ++i) {
930 for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */
931 for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy left face and element points */
932 }
933 }
934
935 if (!dummyEnd[0]) {
936 /* up right partial dummies layer : Loop over columns 3/3 : living on *right* neighbor */
937 const PetscInt neighbor = 5;
938 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
939 const PetscInt *const nNeighbor = nNeighbors[neighbor];
940 const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
941 for (i = 0; i < ghostOffsetEnd[0]; ++i) {
942 for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */
943 for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy left face and element points */
944 }
945 } else {
946 /* Top partial dummies layer : Loop over columns 3/3 : right, living *here* */
947 const PetscInt neighbor = 4;
948 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
949 const PetscInt eprNeighbor = entriesPerElementRow; /* same as here (obviously) */
950 i = stag->n[0];
951 for (d = 0; d < stag->dof[0]; ++d, ++countAll) { /* Note just the vertex here */
952 idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */
953 }
954 for (; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1; /* dummy bottom face, left face and element points */
955 ++i;
956 for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
957 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
958 }
959 }
960 ++j;
961 /* Additional top dummy layers */
962 for (; j < stag->n[1] + ghostOffsetEnd[1]; ++j) {
963 for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
964 for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
965 }
966 }
967 }
968
969 /* Create local-to-global map (in local ordering, includes maps to -1 for dummy points) */
970 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, stag->entriesGhost, idxGlobalAll, PETSC_OWN_POINTER, &dm->ltogmap));
971 }
972
973 /* In special cases, create a dedicated injective local-to-global map */
974 if ((stag->boundaryType[0] == DM_BOUNDARY_PERIODIC && stag->nRanks[0] == 1) || (stag->boundaryType[1] == DM_BOUNDARY_PERIODIC && stag->nRanks[1] == 1)) PetscCall(DMStagPopulateLocalToGlobalInjective(dm));
975
976 /* Free global offsets */
977 PetscCall(PetscFree(globalOffsets));
978
979 /* Precompute location offsets */
980 PetscCall(DMStagComputeLocationOffsets_2d(dm));
981
982 /* View from Options */
983 PetscCall(DMViewFromOptions(dm, NULL, "-dm_view"));
984 PetscFunctionReturn(PETSC_SUCCESS);
985 }
986
987 /* adapted from da2.c */
DMStagSetUpBuildRankGrid_2d(DM dm)988 static PetscErrorCode DMStagSetUpBuildRankGrid_2d(DM dm)
989 {
990 DM_Stag *const stag = (DM_Stag *)dm->data;
991 PetscMPIInt rank, size, m, n;
992 const PetscInt M = stag->N[0];
993 const PetscInt N = stag->N[1];
994
995 PetscFunctionBegin;
996 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
997 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
998 m = stag->nRanks[0];
999 n = stag->nRanks[1];
1000 if (m != PETSC_DECIDE) {
1001 PetscCheck(m >= 1, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Non-positive number of ranks in X direction: %d", m);
1002 PetscCheck(m <= size, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Too many ranks in X direction: %d %d", m, size);
1003 }
1004 if (n != PETSC_DECIDE) {
1005 PetscCheck(n >= 1, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Non-positive number of ranks in Y direction: %d", n);
1006 PetscCheck(n <= size, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Too many ranks in Y direction: %d %d", n, size);
1007 }
1008 if (m == PETSC_DECIDE || n == PETSC_DECIDE) {
1009 if (n != PETSC_DECIDE) {
1010 m = size / n;
1011 } else if (m != PETSC_DECIDE) {
1012 n = size / m;
1013 } else {
1014 /* try for squarish distribution */
1015 m = (PetscMPIInt)(0.5 + PetscSqrtReal(((PetscReal)M) * ((PetscReal)size) / ((PetscReal)N)));
1016 if (!m) m = 1;
1017 while (m > 0) {
1018 n = size / m;
1019 if (m * n == size) break;
1020 m--;
1021 }
1022 if (M > N && m < n) {
1023 PetscMPIInt _m = m;
1024 m = n;
1025 n = _m;
1026 }
1027 }
1028 PetscCheck(m * n == size, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Unable to create partition, check the size of the communicator and input m and n ");
1029 } else PetscCheck(m * n == size, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Given Bad partition. Product of sizes (%d) does not equal communicator size (%d)", m * n, size);
1030 PetscCheck(M >= m, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Partition in x direction is too fine! %" PetscInt_FMT " %d", M, m);
1031 PetscCheck(N >= n, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Partition in y direction is too fine! %" PetscInt_FMT " %d", N, n);
1032 stag->nRanks[0] = m;
1033 stag->nRanks[1] = n;
1034 PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036
DMStagSetUpBuildNeighbors_2d(DM dm)1037 static PetscErrorCode DMStagSetUpBuildNeighbors_2d(DM dm)
1038 {
1039 DM_Stag *const stag = (DM_Stag *)dm->data;
1040 PetscInt d, i;
1041 PetscBool per[2], first[2], last[2];
1042 PetscMPIInt neighborRank[9][2], r[2], n[2];
1043 const PetscInt dim = 2;
1044
1045 PetscFunctionBegin;
1046 for (d = 0; d < dim; ++d)
1047 PetscCheck(stag->boundaryType[d] == DM_BOUNDARY_NONE || stag->boundaryType[d] == DM_BOUNDARY_PERIODIC || stag->boundaryType[d] == DM_BOUNDARY_GHOSTED, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Neighbor determination not implemented for %s",
1048 DMBoundaryTypes[stag->boundaryType[d]]);
1049
1050 /* Assemble some convenience variables */
1051 for (d = 0; d < dim; ++d) {
1052 per[d] = (PetscBool)(stag->boundaryType[d] == DM_BOUNDARY_PERIODIC);
1053 first[d] = stag->firstRank[d];
1054 last[d] = stag->lastRank[d];
1055 r[d] = stag->rank[d];
1056 n[d] = stag->nRanks[d];
1057 }
1058
1059 /* First, compute the position in the rank grid for all neighbors */
1060 neighborRank[0][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left down */
1061 neighborRank[0][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
1062
1063 neighborRank[1][0] = r[0]; /* down */
1064 neighborRank[1][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
1065
1066 neighborRank[2][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right down */
1067 neighborRank[2][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;
1068
1069 neighborRank[3][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left */
1070 neighborRank[3][1] = r[1];
1071
1072 neighborRank[4][0] = r[0];
1073 neighborRank[4][1] = r[1];
1074
1075 neighborRank[5][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right */
1076 neighborRank[5][1] = r[1];
1077
1078 neighborRank[6][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left up */
1079 neighborRank[6][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
1080
1081 neighborRank[7][0] = r[0]; /* up */
1082 neighborRank[7][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
1083
1084 neighborRank[8][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right up */
1085 neighborRank[8][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;
1086
1087 /* Then, compute the rank of each in the linear ordering */
1088 PetscCall(PetscMalloc1(9, &stag->neighbors));
1089 for (i = 0; i < 9; ++i) {
1090 if (neighborRank[i][0] >= 0 && neighborRank[i][1] >= 0) {
1091 stag->neighbors[i] = neighborRank[i][0] + n[0] * neighborRank[i][1];
1092 } else {
1093 stag->neighbors[i] = -1;
1094 }
1095 }
1096 PetscFunctionReturn(PETSC_SUCCESS);
1097 }
1098
DMStagSetUpBuildGlobalOffsets_2d(DM dm,PetscInt ** pGlobalOffsets)1099 static PetscErrorCode DMStagSetUpBuildGlobalOffsets_2d(DM dm, PetscInt **pGlobalOffsets)
1100 {
1101 const DM_Stag *const stag = (DM_Stag *)dm->data;
1102 PetscInt *globalOffsets;
1103 PetscInt i, j, d, entriesPerFace, count;
1104 PetscMPIInt size;
1105 PetscBool extra[2];
1106
1107 PetscFunctionBegin;
1108 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
1109 for (d = 0; d < 2; ++d) extra[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC); /* Extra points in global rep */
1110 entriesPerFace = stag->dof[0] + stag->dof[1];
1111 PetscCall(PetscMalloc1(size, pGlobalOffsets));
1112 globalOffsets = *pGlobalOffsets;
1113 globalOffsets[0] = 0;
1114 count = 1; /* note the count is offset by 1 here. We add the size of the previous rank */
1115 for (j = 0; j < stag->nRanks[1] - 1; ++j) {
1116 const PetscInt nnj = stag->l[1][j];
1117 for (i = 0; i < stag->nRanks[0] - 1; ++i) {
1118 const PetscInt nni = stag->l[0][i];
1119 globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * stag->entriesPerElement; /* No right/top/front boundaries */
1120 ++count;
1121 }
1122 {
1123 /* i = stag->nRanks[0]-1; */
1124 const PetscInt nni = stag->l[0][i];
1125 globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * stag->entriesPerElement + (extra[0] ? nnj * entriesPerFace : 0); /* Extra faces on the right */
1126 ++count;
1127 }
1128 }
1129 {
1130 /* j = stag->nRanks[1]-1; */
1131 const PetscInt nnj = stag->l[1][j];
1132 for (i = 0; i < stag->nRanks[0] - 1; ++i) {
1133 const PetscInt nni = stag->l[0][i];
1134 globalOffsets[count] = globalOffsets[count - 1] + nni * nnj * stag->entriesPerElement + (extra[1] ? nni * entriesPerFace : 0); /* Extra faces on the top */
1135 ++count;
1136 }
1137 /* Don't need to compute entries in last element */
1138 }
1139 PetscFunctionReturn(PETSC_SUCCESS);
1140 }
1141
DMStagComputeLocationOffsets_2d(DM dm)1142 static PetscErrorCode DMStagComputeLocationOffsets_2d(DM dm)
1143 {
1144 DM_Stag *const stag = (DM_Stag *)dm->data;
1145 const PetscInt epe = stag->entriesPerElement;
1146 const PetscInt epr = stag->nGhost[0] * epe;
1147
1148 PetscFunctionBegin;
1149 PetscCall(PetscMalloc1(DMSTAG_NUMBER_LOCATIONS, &stag->locationOffsets));
1150 stag->locationOffsets[DMSTAG_DOWN_LEFT] = 0;
1151 stag->locationOffsets[DMSTAG_DOWN] = stag->locationOffsets[DMSTAG_DOWN_LEFT] + stag->dof[0];
1152 stag->locationOffsets[DMSTAG_DOWN_RIGHT] = stag->locationOffsets[DMSTAG_DOWN_LEFT] + epe;
1153 stag->locationOffsets[DMSTAG_LEFT] = stag->locationOffsets[DMSTAG_DOWN] + stag->dof[1];
1154 stag->locationOffsets[DMSTAG_ELEMENT] = stag->locationOffsets[DMSTAG_LEFT] + stag->dof[1];
1155 stag->locationOffsets[DMSTAG_RIGHT] = stag->locationOffsets[DMSTAG_LEFT] + epe;
1156 stag->locationOffsets[DMSTAG_UP_LEFT] = stag->locationOffsets[DMSTAG_DOWN_LEFT] + epr;
1157 stag->locationOffsets[DMSTAG_UP] = stag->locationOffsets[DMSTAG_DOWN] + epr;
1158 stag->locationOffsets[DMSTAG_UP_RIGHT] = stag->locationOffsets[DMSTAG_UP_LEFT] + epe;
1159 PetscFunctionReturn(PETSC_SUCCESS);
1160 }
1161
DMStagPopulateLocalToGlobalInjective_2d(DM dm)1162 PETSC_INTERN PetscErrorCode DMStagPopulateLocalToGlobalInjective_2d(DM dm)
1163 {
1164 DM_Stag *const stag = (DM_Stag *)dm->data;
1165 PetscInt *idxLocal, *idxGlobal, *globalOffsetsRecomputed;
1166 const PetscInt *globalOffsets;
1167 PetscInt i, j, d, count, entriesPerCorner, entriesPerFace, entriesPerElementRowGhost, entriesPerElementRow, ghostOffsetStart[2];
1168 IS isLocal, isGlobal;
1169 PetscBool dummyEnd[2];
1170
1171 PetscFunctionBegin;
1172 PetscCall(DMStagSetUpBuildGlobalOffsets_2d(dm, &globalOffsetsRecomputed)); /* note that we don't actually use all of these. An available optimization is to pass them, when available */
1173 globalOffsets = globalOffsetsRecomputed;
1174 PetscCall(PetscMalloc1(stag->entries, &idxLocal));
1175 PetscCall(PetscMalloc1(stag->entries, &idxGlobal));
1176 for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1177 entriesPerCorner = stag->dof[0];
1178 entriesPerFace = stag->dof[0] + stag->dof[1];
1179 entriesPerElementRow = stag->n[0] * stag->entriesPerElement + (dummyEnd[0] ? entriesPerFace : 0);
1180 entriesPerElementRowGhost = stag->nGhost[0] * stag->entriesPerElement;
1181 count = 0;
1182 for (d = 0; d < 2; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
1183 {
1184 const PetscInt neighbor = 4;
1185 const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
1186 for (j = 0; j < stag->n[1]; ++j) {
1187 const PetscInt jghost = j + ghostOffsetStart[1];
1188 for (i = 0; i < stag->n[0]; ++i) {
1189 const PetscInt ighost = i + ghostOffsetStart[0];
1190 for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
1191 idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
1192 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1193 }
1194 }
1195 if (dummyEnd[0]) {
1196 const PetscInt ighost = i + ghostOffsetStart[0];
1197 i = stag->n[0];
1198 for (d = 0; d < stag->dof[0]; ++d, ++count) { /* vertex first */
1199 idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
1200 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1201 }
1202 for (d = 0; d < stag->dof[1]; ++d, ++count) { /* then left edge (skipping bottom face) */
1203 idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + stag->dof[0] + d;
1204 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
1205 }
1206 }
1207 }
1208 if (dummyEnd[1]) {
1209 const PetscInt jghost = j + ghostOffsetStart[1];
1210 j = stag->n[1];
1211 for (i = 0; i < stag->n[0]; ++i) {
1212 const PetscInt ighost = i + ghostOffsetStart[0];
1213 for (d = 0; d < entriesPerFace; ++d, ++count) { /* vertex and bottom face (which are the first entries) */
1214 idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
1215 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1216 }
1217 }
1218 if (dummyEnd[0]) {
1219 const PetscInt ighost = i + ghostOffsetStart[0];
1220 i = stag->n[0];
1221 for (d = 0; d < entriesPerCorner; ++d, ++count) { /* vertex only */
1222 idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
1223 idxLocal[count] = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1224 }
1225 }
1226 }
1227 }
1228 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), stag->entries, idxLocal, PETSC_OWN_POINTER, &isLocal));
1229 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), stag->entries, idxGlobal, PETSC_OWN_POINTER, &isGlobal));
1230 {
1231 Vec local, global;
1232 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm), 1, stag->entries, PETSC_DECIDE, NULL, &global));
1233 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, PetscMax(stag->entriesPerElement, 1), stag->entriesGhost, NULL, &local));
1234 PetscCall(VecScatterCreate(local, isLocal, global, isGlobal, &stag->ltog_injective));
1235 PetscCall(VecDestroy(&global));
1236 PetscCall(VecDestroy(&local));
1237 }
1238 PetscCall(ISDestroy(&isLocal));
1239 PetscCall(ISDestroy(&isGlobal));
1240 if (globalOffsetsRecomputed) PetscCall(PetscFree(globalOffsetsRecomputed));
1241 PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243
DMStagPopulateLocalToLocal2d_Internal(DM dm)1244 PETSC_INTERN PetscErrorCode DMStagPopulateLocalToLocal2d_Internal(DM dm)
1245 {
1246 DM_Stag *const stag = (DM_Stag *)dm->data;
1247 PetscInt *idxRemap;
1248 PetscBool dummyEnd[2];
1249 PetscInt i, j, d, count, leftGhostElements, downGhostElements, entriesPerRowGhost, iOffset, jOffset;
1250 PetscInt dOffset[4] = {0};
1251
1252 PetscFunctionBegin;
1253 PetscCall(VecScatterCopy(stag->gtol, &stag->ltol));
1254 PetscCall(PetscMalloc1(stag->entries, &idxRemap));
1255
1256 for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1257 leftGhostElements = stag->start[0] - stag->startGhost[0];
1258 downGhostElements = stag->start[1] - stag->startGhost[1];
1259 entriesPerRowGhost = stag->nGhost[0] * stag->entriesPerElement;
1260 dOffset[1] = dOffset[0] + stag->dof[0];
1261 dOffset[2] = dOffset[1] + stag->dof[1];
1262 dOffset[3] = dOffset[2] + stag->dof[1];
1263
1264 count = 0;
1265 for (j = 0; j < stag->n[1]; ++j) {
1266 jOffset = entriesPerRowGhost * (downGhostElements + j);
1267 for (i = 0; i < stag->n[0]; ++i) {
1268 iOffset = stag->entriesPerElement * (leftGhostElements + i);
1269 // all
1270 for (d = 0; d < stag->entriesPerElement; ++d) idxRemap[count++] = jOffset + iOffset + d;
1271 }
1272 if (dummyEnd[0]) {
1273 iOffset = stag->entriesPerElement * (leftGhostElements + stag->n[0]);
1274 // down left, left
1275 for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[0] + d;
1276 for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[2] + d;
1277 }
1278 }
1279 if (dummyEnd[1]) {
1280 jOffset = entriesPerRowGhost * (downGhostElements + stag->n[1]);
1281 for (i = 0; i < stag->n[0]; ++i) {
1282 iOffset = stag->entriesPerElement * (leftGhostElements + i);
1283 // down left, down
1284 for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[0] + d;
1285 for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[1] + d;
1286 }
1287 if (dummyEnd[0]) {
1288 iOffset = stag->entriesPerElement * (leftGhostElements + stag->n[0]);
1289 // down left
1290 for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[0] + d;
1291 }
1292 }
1293
1294 PetscCheck(count == stag->entries, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of entries computed in ltol (%" PetscInt_FMT ") is not as expected (%" PetscInt_FMT ")", count, stag->entries);
1295
1296 PetscCall(VecScatterRemap(stag->ltol, idxRemap, NULL));
1297 PetscCall(PetscFree(idxRemap));
1298 PetscFunctionReturn(PETSC_SUCCESS);
1299 }
1300
DMCreateMatrix_Stag_2D_AIJ_Assemble(DM dm,Mat A)1301 PETSC_INTERN PetscErrorCode DMCreateMatrix_Stag_2D_AIJ_Assemble(DM dm, Mat A)
1302 {
1303 PetscInt entries, dof[DMSTAG_MAX_STRATA], epe, stencil_width, N[2], start[2], n[2], n_extra[2];
1304 DMStagStencilType stencil_type;
1305 DMBoundaryType boundary_type[2];
1306
1307 PetscFunctionBegin;
1308 PetscCall(DMStagGetDOF(dm, &dof[0], &dof[1], &dof[2], NULL));
1309 PetscCall(DMStagGetStencilType(dm, &stencil_type));
1310 PetscCall(DMStagGetStencilWidth(dm, &stencil_width));
1311 PetscCall(DMStagGetEntries(dm, &entries));
1312 PetscCall(DMStagGetEntriesPerElement(dm, &epe));
1313 PetscCall(DMStagGetCorners(dm, &start[0], &start[1], NULL, &n[0], &n[1], NULL, &n_extra[0], &n_extra[1], NULL));
1314 PetscCall(DMStagGetGlobalSizes(dm, &N[0], &N[1], NULL));
1315 PetscCall(DMStagGetBoundaryTypes(dm, &boundary_type[0], &boundary_type[1], NULL));
1316
1317 if (stencil_type == DMSTAG_STENCIL_NONE) {
1318 /* Couple all DOF at each location to each other */
1319 DMStagStencil *row_vertex, *row_face_down, *row_face_left, *row_element;
1320
1321 PetscCall(PetscMalloc1(dof[0], &row_vertex));
1322 for (PetscInt c = 0; c < dof[0]; ++c) {
1323 row_vertex[c].loc = DMSTAG_DOWN_LEFT;
1324 row_vertex[c].c = c;
1325 }
1326
1327 PetscCall(PetscMalloc1(dof[1], &row_face_down));
1328 for (PetscInt c = 0; c < dof[1]; ++c) {
1329 row_face_down[c].loc = DMSTAG_DOWN;
1330 row_face_down[c].c = c;
1331 }
1332
1333 PetscCall(PetscMalloc1(dof[1], &row_face_left));
1334 for (PetscInt c = 0; c < dof[1]; ++c) {
1335 row_face_left[c].loc = DMSTAG_LEFT;
1336 row_face_left[c].c = c;
1337 }
1338
1339 PetscCall(PetscMalloc1(dof[2], &row_element));
1340 for (PetscInt c = 0; c < dof[2]; ++c) {
1341 row_element[c].loc = DMSTAG_ELEMENT;
1342 row_element[c].c = c;
1343 }
1344
1345 for (PetscInt ey = start[1]; ey < start[1] + n[1] + n_extra[1]; ++ey) {
1346 for (PetscInt ex = start[0]; ex < start[0] + n[0] + n_extra[0]; ++ex) {
1347 {
1348 for (PetscInt c = 0; c < dof[0]; ++c) {
1349 row_vertex[c].i = ex;
1350 row_vertex[c].j = ey;
1351 }
1352 PetscCall(DMStagMatSetValuesStencil(dm, A, dof[0], row_vertex, dof[0], row_vertex, NULL, INSERT_VALUES));
1353 }
1354 if (ex < N[0]) {
1355 for (PetscInt c = 0; c < dof[1]; ++c) {
1356 row_face_down[c].i = ex;
1357 row_face_down[c].j = ey;
1358 }
1359 PetscCall(DMStagMatSetValuesStencil(dm, A, dof[1], row_face_down, dof[1], row_face_down, NULL, INSERT_VALUES));
1360 }
1361 if (ey < N[1]) {
1362 for (PetscInt c = 0; c < dof[1]; ++c) {
1363 row_face_left[c].i = ex;
1364 row_face_left[c].j = ey;
1365 }
1366 PetscCall(DMStagMatSetValuesStencil(dm, A, dof[1], row_face_left, dof[1], row_face_left, NULL, INSERT_VALUES));
1367 }
1368 if (ex < N[0] && ey < N[1]) {
1369 for (PetscInt c = 0; c < dof[2]; ++c) {
1370 row_element[c].i = ex;
1371 row_element[c].j = ey;
1372 }
1373 PetscCall(DMStagMatSetValuesStencil(dm, A, dof[2], row_element, dof[2], row_element, NULL, INSERT_VALUES));
1374 }
1375 }
1376 }
1377 PetscCall(PetscFree(row_vertex));
1378 PetscCall(PetscFree(row_face_left));
1379 PetscCall(PetscFree(row_face_down));
1380 PetscCall(PetscFree(row_element));
1381 } else if (stencil_type == DMSTAG_STENCIL_STAR || stencil_type == DMSTAG_STENCIL_BOX) {
1382 DMStagStencil *col, *row;
1383
1384 PetscCall(PetscMalloc1(epe, &row));
1385 {
1386 PetscInt nrows = 0;
1387
1388 for (PetscInt c = 0; c < dof[0]; ++c) {
1389 row[nrows].c = c;
1390 row[nrows].loc = DMSTAG_DOWN_LEFT;
1391 ++nrows;
1392 }
1393 for (PetscInt c = 0; c < dof[1]; ++c) {
1394 row[nrows].c = c;
1395 row[nrows].loc = DMSTAG_LEFT;
1396 ++nrows;
1397 }
1398 for (PetscInt c = 0; c < dof[1]; ++c) {
1399 row[nrows].c = c;
1400 row[nrows].loc = DMSTAG_DOWN;
1401 ++nrows;
1402 }
1403 for (PetscInt c = 0; c < dof[2]; ++c) {
1404 row[nrows].c = c;
1405 row[nrows].loc = DMSTAG_ELEMENT;
1406 ++nrows;
1407 }
1408 }
1409
1410 PetscCall(PetscMalloc1(epe, &col));
1411 {
1412 PetscInt ncols = 0;
1413
1414 for (PetscInt c = 0; c < dof[0]; ++c) {
1415 col[ncols].c = c;
1416 col[ncols].loc = DMSTAG_DOWN_LEFT;
1417 ++ncols;
1418 }
1419 for (PetscInt c = 0; c < dof[1]; ++c) {
1420 col[ncols].c = c;
1421 col[ncols].loc = DMSTAG_LEFT;
1422 ++ncols;
1423 }
1424 for (PetscInt c = 0; c < dof[1]; ++c) {
1425 col[ncols].c = c;
1426 col[ncols].loc = DMSTAG_DOWN;
1427 ++ncols;
1428 }
1429 for (PetscInt c = 0; c < dof[2]; ++c) {
1430 col[ncols].c = c;
1431 col[ncols].loc = DMSTAG_ELEMENT;
1432 ++ncols;
1433 }
1434 }
1435
1436 for (PetscInt ey = start[1]; ey < start[1] + n[1] + n_extra[1]; ++ey) {
1437 for (PetscInt ex = start[0]; ex < start[0] + n[0] + n_extra[0]; ++ex) {
1438 for (PetscInt i = 0; i < epe; ++i) {
1439 row[i].i = ex;
1440 row[i].j = ey;
1441 }
1442 for (PetscInt offset_y = -stencil_width; offset_y <= stencil_width; ++offset_y) {
1443 const PetscInt ey_offset = ey + offset_y;
1444 for (PetscInt offset_x = -stencil_width; offset_x <= stencil_width; ++offset_x) {
1445 const PetscInt ex_offset = ex + offset_x;
1446 /* Only set values corresponding to elements which can have non-dummy entries,
1447 meaning those that map to unknowns in the global representation. In the periodic
1448 case, this is the entire stencil, but in all other cases, only includes a single
1449 "extra" element which is partially outside the physical domain (those points in the
1450 global representation */
1451 if ((stencil_type == DMSTAG_STENCIL_BOX || offset_x == 0 || offset_y == 0) && (boundary_type[0] == DM_BOUNDARY_PERIODIC || (ex_offset < N[0] + 1 && ex_offset >= 0)) && (boundary_type[1] == DM_BOUNDARY_PERIODIC || (ey_offset < N[1] + 1 && ey_offset >= 0))) {
1452 for (PetscInt i = 0; i < epe; ++i) {
1453 col[i].i = ex_offset;
1454 col[i].j = ey_offset;
1455 }
1456 PetscCall(DMStagMatSetValuesStencil(dm, A, epe, row, epe, col, NULL, INSERT_VALUES));
1457 }
1458 }
1459 }
1460 }
1461 }
1462 PetscCall(PetscFree(row));
1463 PetscCall(PetscFree(col));
1464 } else SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Unsupported stencil type %s", DMStagStencilTypes[stencil_type]);
1465 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1466 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1467 PetscFunctionReturn(PETSC_SUCCESS);
1468 }
1469