xref: /petsc/src/dm/impls/plex/plexsfc.c (revision ccfb0f9f40a0131988d7995ed9679700dae2a75a)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 // ***** Overview of ZCode *******
21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index.
22 // This is known as Morton encoding, and is referred to as ZCode in this code.
23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be:
24 //    [k2,j2,i2,k1,j1,i1,k0,j0,i0]
25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`).
26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices.
27 
28 // Decodes the leading interleaved index from a ZCode
29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0]
30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 (translated to octal)
31 static unsigned ZCodeSplit1(ZCode z)
32 {
33   z &= 0111111111111111111111;
34   z = (z | z >> 2) & 0103030303030303030303;
35   z = (z | z >> 4) & 0100170017001700170017;
36   z = (z | z >> 8) & 0370000037700000377;
37   z = (z | z >> 16) & 0370000000000177777;
38   z = (z | z >> 32) & 07777777;
39   return (unsigned)z;
40 }
41 
42 // Encodes the leading interleaved index from a ZCode
43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0]
44 static ZCode ZEncode1(unsigned t)
45 {
46   ZCode z = t;
47   z &= 07777777;
48   z = (z | z << 32) & 0370000000000177777;
49   z = (z | z << 16) & 0370000037700000377;
50   z = (z | z << 8) & 0100170017001700170017;
51   z = (z | z << 4) & 0103030303030303030303;
52   z = (z | z << 2) & 0111111111111111111111;
53   return z;
54 }
55 
56 // Decodes i j k indices from a ZCode.
57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode
58 static Ijk ZCodeSplit(ZCode z)
59 {
60   Ijk c;
61   c.i = ZCodeSplit1(z >> 2);
62   c.j = ZCodeSplit1(z >> 1);
63   c.k = ZCodeSplit1(z >> 0);
64   return c;
65 }
66 
67 // Encodes i j k indices to a ZCode.
68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement
69 static ZCode ZEncode(Ijk c)
70 {
71   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
72   return z;
73 }
74 
75 static PetscBool IjkActive(Ijk extent, Ijk l)
76 {
77   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
78   return PETSC_FALSE;
79 }
80 
81 // If z is not the base of an octet (last 3 bits 0), return 0.
82 //
83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
85 static ZCode ZStepOct(ZCode z)
86 {
87   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
88   ZCode step = 07;
89   for (; (z & step) == 0; step = (step << 3) | 07) { }
90   return step >> 3;
91 }
92 
93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
95 {
96   PetscFunctionBegin;
97   layout->eextent.i = eextent[0];
98   layout->eextent.j = eextent[1];
99   layout->eextent.k = eextent[2];
100   layout->vextent.i = vextent[0];
101   layout->vextent.j = vextent[1];
102   layout->vextent.k = vextent[2];
103   layout->comm_size = size;
104   layout->zstarts   = NULL;
105   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
106 
107   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
108   ZCode    z           = 0;
109   layout->zstarts[0]   = 0;
110   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
111   for (PetscMPIInt r = 0; r < size; r++) {
112     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
113     for (count = 0; count < elems_needed; z++) {
114       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
115       for (ZCode s = skip;; s >>= 3) {
116         Ijk trial = ZCodeSplit(z + s);
117         if (IjkActive(layout->eextent, trial)) {
118           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
119           count += s + 1;
120           z += s;
121           break;
122         }
123         if (s == 0) { // the whole skip octet is inactive
124           z += skip;
125           break;
126         }
127       }
128     }
129     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
130     //
131     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
132     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
133     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
134     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
135     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
136     // complicate the job of identifying an owner and its offset.
137     //
138     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
139     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
140     // the issue:
141     //
142     // Consider this partition on rank 0 (left) and rank 1.
143     //
144     //    4 --------  5 -- 14 --10 -- 21 --11
145     //                |          |          |
146     // 7 -- 16 --  8  |          |          |
147     // |           |  3 -------  7 -------  9
148     // |           |             |          |
149     // 4 --------  6 ------ 10   |          |
150     // |           |         |   6 -- 16 -- 8
151     // |           |         |
152     // 3 ---11---  5 --18--  9
153     //
154     // The periodic face SF looks like
155     // [0] Number of roots=21, leaves=1, remote ranks=1
156     // [0] 16 <- (0,11)
157     // [1] Number of roots=22, leaves=2, remote ranks=2
158     // [1] 14 <- (0,18)
159     // [1] 21 <- (1,16)
160     //
161     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
162     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
163     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
164     //
165     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
166     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
167     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
168     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
169     // stranded vertices.
170     for (; z <= ZEncode(layout->vextent); z++) {
171       Ijk loc = ZCodeSplit(z);
172       if (IjkActive(layout->eextent, loc)) break;
173       z += ZStepOct(z);
174     }
175     layout->zstarts[r + 1] = z;
176   }
177   layout->zstarts[size] = ZEncode(layout->vextent);
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
182 {
183   PetscInt remote_elem = 0;
184   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
185     Ijk loc = ZCodeSplit(rz);
186     if (IjkActive(layout->eextent, loc)) remote_elem++;
187     else rz += ZStepOct(rz);
188   }
189   return remote_elem;
190 }
191 
192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
193 {
194   PetscInt lo = 0, hi = n;
195 
196   if (n == 0) return -1;
197   while (hi - lo > 1) {
198     PetscInt mid = lo + (hi - lo) / 2;
199     if (key < X[mid]) hi = mid;
200     else lo = mid;
201   }
202   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
203 }
204 
205 static inline PetscBool IsPointInsideStratum(PetscInt point, PetscInt pStart, PetscInt pEnd)
206 {
207   return (point >= pStart && point < pEnd) ? PETSC_TRUE : PETSC_FALSE;
208 }
209 
210 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
211 {
212   MPI_Comm    comm;
213   PetscInt    dim, vStart, vEnd;
214   PetscMPIInt size;
215   PetscSF     face_sfs[3];
216   PetscScalar transforms[3][4][4] = {{{0}}};
217 
218   PetscFunctionBegin;
219   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
220   PetscCallMPI(MPI_Comm_size(comm, &size));
221   PetscCall(DMGetDimension(dm, &dim));
222   const PetscInt csize = PetscPowInt(2, dim - 1);
223   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
224 
225   PetscInt num_directions = 0;
226   for (PetscInt direction = 0; direction < dim; direction++) {
227     PetscCount   num_faces;
228     PetscInt    *faces;
229     ZCode       *donor_verts, *donor_minz;
230     PetscSFNode *leaf;
231     PetscCount   num_multiroots = 0;
232     PetscInt     pStart, pEnd;
233     PetscBool    sorted;
234     PetscInt     inum_faces;
235 
236     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
237     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
238     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
239     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
240     PetscCall(PetscMalloc1(num_faces, &donor_minz));
241     PetscCall(PetscMalloc1(num_faces, &leaf));
242     for (PetscCount i = 0; i < num_faces; i++) {
243       ZCode minz = donor_verts[i * csize];
244 
245       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
246       donor_minz[i] = minz;
247     }
248     PetscCall(PetscIntCast(num_faces, &inum_faces));
249     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
250     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
251     // Checking for sorting is a cheap check that there are no duplicates.
252     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
253     for (PetscCount i = 0; i < num_faces;) {
254       ZCode       z = donor_minz[i];
255       PetscMPIInt remote_rank, remote_count = 0;
256 
257       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout->zstarts), &remote_rank));
258       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
259       // Process all the vertices on this rank
260       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
261         Ijk loc = ZCodeSplit(rz);
262 
263         if (rz == z) {
264           leaf[i].rank  = remote_rank;
265           leaf[i].index = remote_count;
266           i++;
267           if (i == num_faces) break;
268           z = donor_minz[i];
269         }
270         if (IjkActive(layout->vextent, loc)) remote_count++;
271       }
272     }
273     PetscCall(PetscFree(donor_minz));
274     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
275     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
276     const PetscInt *my_donor_degree;
277     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
278     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
279 
280     for (PetscInt i = 0; i < vEnd - vStart; i++) {
281       num_multiroots += my_donor_degree[i];
282       if (my_donor_degree[i] == 0) continue;
283       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
284     }
285     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
286     PetscCount num_my_donors;
287 
288     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
289     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots);
290     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
291     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
292     for (PetscCount i = 0; i < num_my_donors; i++) {
293       PetscInt f = my_donors[i];
294       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;
295 
296       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
297       for (PetscInt j = 0; j < num_points; j++) {
298         PetscInt p = points[2 * j];
299         if (!IsPointInsideStratum(p, vStart, vEnd)) continue;
300         minv = PetscMin(minv, p);
301       }
302       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
303       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
304       my_donor_indices[minv - vStart] = f;
305     }
306     PetscCall(PetscMalloc1(num_faces, &donor_indices));
307     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
308     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
309     PetscCall(PetscFree(my_donor_indices));
310     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
311     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
312     PetscCall(PetscFree(donor_indices));
313     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
314     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
315     {
316       char face_sf_name[PETSC_MAX_PATH_LEN];
317       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
318       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
319     }
320 
321     transforms[num_directions][0][0]         = 1;
322     transforms[num_directions][1][1]         = 1;
323     transforms[num_directions][2][2]         = 1;
324     transforms[num_directions][3][3]         = 1;
325     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
326     num_directions++;
327   }
328 
329   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
330   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
331 
332   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
337 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
338 // We use this crude approach here so we don't have to write new GPU kernels yet.
339 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
340 {
341   PetscFunctionBegin;
342   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
343   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
344     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
345     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
346   }
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 // Modify index array based on the transformation of `point` for the given section and field
351 // Used for correcting the sfNatural based on point reorientation
352 static PetscErrorCode DMPlexOrientFieldPointIndex(DM dm, PetscSection section, PetscInt field, PetscInt array_size, PetscInt array[], PetscInt point, PetscInt orientation)
353 {
354   PetscInt        *copy;
355   PetscInt         dof, off, point_ornt[2] = {point, orientation};
356   const PetscInt **perms;
357 
358   PetscFunctionBeginUser;
359   PetscCall(PetscSectionGetFieldPointSyms(section, field, 1, point_ornt, &perms, NULL));
360   if (!perms) PetscFunctionReturn(PETSC_SUCCESS); // section may not have symmetries, such as Q2 finite elements
361   PetscCall(PetscSectionGetDof(section, point, &dof));
362   PetscCall(PetscSectionGetOffset(section, point, &off));
363   PetscCheck(off + dof <= array_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Section indices exceed index array bounds");
364   PetscCall(DMGetWorkArray(dm, dof, MPIU_INT, &copy));
365   PetscArraycpy(copy, &array[off], dof);
366 
367   for (PetscInt i = 0; i < dof; i++) {
368     if (perms[0]) array[off + perms[0][i]] = copy[i];
369   }
370 
371   PetscCall(PetscSectionRestoreFieldPointSyms(section, field, 1, point_ornt, &perms, NULL));
372   PetscCall(DMRestoreWorkArray(dm, dof, MPIU_INT, &copy));
373   PetscFunctionReturn(PETSC_SUCCESS);
374 }
375 
376 // Modify Vec based on the transformation of `point` for the given section and field
377 static PetscErrorCode DMPlexOrientFieldPointVec(DM dm, PetscSection section, PetscInt field, Vec V, PetscInt point, PetscInt orientation)
378 {
379   PetscScalar        *copy, *V_arr;
380   PetscInt            dof, off, point_ornt[2] = {point, orientation};
381   const PetscInt    **perms;
382   const PetscScalar **rots;
383 
384   PetscFunctionBeginUser;
385   PetscCall(PetscSectionGetFieldPointSyms(section, field, 1, point_ornt, &perms, &rots));
386   if (!perms) PetscFunctionReturn(PETSC_SUCCESS); // section may not have symmetries, such as Q2 finite elements
387   PetscCall(PetscSectionGetDof(section, point, &dof));
388   PetscCall(PetscSectionGetOffset(section, point, &off));
389   PetscCall(VecGetArray(V, &V_arr));
390   PetscCall(DMGetWorkArray(dm, dof, MPIU_SCALAR, &copy));
391   PetscArraycpy(copy, &V_arr[off], dof);
392 
393   for (PetscInt i = 0; i < dof; i++) {
394     if (perms[0]) V_arr[off + perms[0][i]] = copy[i];
395     if (rots[0]) V_arr[off + perms[0][i]] *= rots[0][i];
396   }
397 
398   PetscCall(PetscSectionRestoreFieldPointSyms(section, field, 1, point_ornt, &perms, &rots));
399   PetscCall(DMRestoreWorkArray(dm, dof, MPIU_SCALAR, &copy));
400   PetscCall(VecRestoreArray(V, &V_arr));
401   PetscFunctionReturn(PETSC_SUCCESS);
402 }
403 
404 // Reorient the point in the DMPlex while also applying necessary corrections to other structures (e.g. coordinates)
405 static PetscErrorCode DMPlexOrientPointWithCorrections(DM dm, PetscInt point, PetscInt ornt, PetscSection perm_section, PetscInt perm_length, PetscInt perm[])
406 {
407   // TODO: Potential speed up if we early exit for ornt == 0 (i.e. if ornt is identity, we don't need to do anything)
408   PetscFunctionBeginUser;
409   PetscCall(DMPlexOrientPoint(dm, point, ornt));
410 
411   { // Correct coordinates based on new cone ordering
412     DM           cdm;
413     PetscSection csection;
414     Vec          coordinates;
415     PetscInt     pStart, pEnd;
416 
417     PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
418     PetscCall(DMGetCoordinateDM(dm, &cdm));
419     PetscCall(DMGetLocalSection(cdm, &csection));
420     PetscCall(PetscSectionGetChart(csection, &pStart, &pEnd));
421     if (IsPointInsideStratum(point, pStart, pEnd)) PetscCall(DMPlexOrientFieldPointVec(cdm, csection, 0, coordinates, point, ornt));
422   }
423 
424   if (perm_section) {
425     PetscInt num_fields;
426     PetscCall(PetscSectionGetNumFields(perm_section, &num_fields));
427     for (PetscInt f = 0; f < num_fields; f++) PetscCall(DMPlexOrientFieldPointIndex(dm, perm_section, f, perm_length, perm, point, ornt));
428   }
429   PetscFunctionReturn(PETSC_SUCCESS);
430 }
431 
432 // Creates SF to communicate data from donor to periodic faces. The data can be different sizes per donor-periodic pair and is given in `point_sizes[]`
433 static PetscErrorCode CreateDonorToPeriodicSF(DM dm, PetscSF face_sf, PetscInt pStart, PetscInt pEnd, const PetscInt point_sizes[], PetscInt *rootbuffersize, PetscInt *leafbuffersize, PetscBT *rootbt, PetscSF *sf_closure)
434 {
435   MPI_Comm           comm;
436   PetscMPIInt        rank;
437   PetscInt           nroots, nleaves;
438   PetscInt          *rootdata, *leafdata;
439   const PetscInt    *filocal;
440   const PetscSFNode *firemote;
441 
442   PetscFunctionBeginUser;
443   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
444   PetscCallMPI(MPI_Comm_rank(comm, &rank));
445 
446   PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
447   PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
448   for (PetscInt i = 0; i < nleaves; i++) {
449     PetscInt point = filocal[i];
450     PetscCheck(IsPointInsideStratum(point, pStart, pEnd), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in leaves exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", point, pStart, pEnd);
451     leafdata[point] = point_sizes[point - pStart];
452   }
453   PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
454   PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
455 
456   PetscInt root_offset = 0;
457   PetscCall(PetscBTCreate(nroots, rootbt));
458   for (PetscInt p = 0; p < nroots; p++) {
459     const PetscInt *donor_dof = rootdata + nroots;
460     if (donor_dof[p] == 0) {
461       rootdata[2 * p]     = -1;
462       rootdata[2 * p + 1] = -1;
463       continue;
464     }
465     PetscCall(PetscBTSet(*rootbt, p));
466     PetscCheck(IsPointInsideStratum(p, pStart, pEnd), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in roots exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", p, pStart, pEnd);
467     PetscInt p_size = point_sizes[p - pStart];
468     PetscCheck(donor_dof[p] == p_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf data size (%" PetscInt_FMT ") does not match root data size (%" PetscInt_FMT ")", donor_dof[p], p_size);
469     rootdata[2 * p]     = root_offset;
470     rootdata[2 * p + 1] = p_size;
471     root_offset += p_size;
472   }
473   PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
474   PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
475   // Count how many leaves we need to communicate the closures
476   PetscInt leaf_offset = 0;
477   for (PetscInt i = 0; i < nleaves; i++) {
478     PetscInt point = filocal[i];
479     if (leafdata[2 * point + 1] < 0) continue;
480     leaf_offset += leafdata[2 * point + 1];
481   }
482 
483   PetscSFNode *closure_leaf;
484   PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
485   leaf_offset = 0;
486   for (PetscInt i = 0; i < nleaves; i++) {
487     PetscInt point   = filocal[i];
488     PetscInt cl_size = leafdata[2 * point + 1];
489     if (cl_size < 0) continue;
490     for (PetscInt j = 0; j < cl_size; j++) {
491       closure_leaf[leaf_offset].rank  = firemote[i].rank;
492       closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
493       leaf_offset++;
494     }
495   }
496 
497   PetscCall(PetscSFCreate(comm, sf_closure));
498   PetscCall(PetscSFSetGraph(*sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
499   *rootbuffersize = root_offset;
500   *leafbuffersize = leaf_offset;
501   PetscCall(PetscFree2(rootdata, leafdata));
502   PetscFunctionReturn(PETSC_SUCCESS);
503 }
504 
505 // Determine if `key` is in `array`. `array` does NOT need to be sorted
506 static inline PetscBool SearchIntArray(PetscInt key, PetscInt array_size, const PetscInt array[])
507 {
508   for (PetscInt i = 0; i < array_size; i++)
509     if (array[i] == key) return PETSC_TRUE;
510   return PETSC_FALSE;
511 }
512 
513 // Translate a cone in periodic points to the cone in donor points based on the `periodic2donor` array
514 static inline PetscErrorCode TranslateConeP2D(const PetscInt periodic_cone[], PetscInt cone_size, const PetscInt periodic2donor[], PetscInt p2d_count, PetscInt p2d_cone[])
515 {
516   PetscFunctionBeginUser;
517   for (PetscInt p = 0; p < cone_size; p++) {
518     PetscInt p2d_index = -1;
519     for (PetscInt p2d = 0; p2d < p2d_count; p2d++) {
520       if (periodic2donor[p2d * 2] == periodic_cone[p]) p2d_index = p2d;
521     }
522     PetscCheck(p2d_index >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find periodic point in periodic-to-donor array");
523     p2d_cone[p] = periodic2donor[2 * p2d_index + 1];
524   }
525   PetscFunctionReturn(PETSC_SUCCESS);
526 }
527 
528 // Corrects the cone order of periodic faces (and their transitive closure's cones) to match their donor face pair.
529 //
530 // This is done by:
531 // 1. Communicating the donor's vertex coordinates and recursive cones (i.e. its own cone and those of it's constituent edges) to it's periodic pairs
532 //    - The donor vertices have the isoperiodic transform applied to them such that they should match exactly
533 // 2. Translating the periodic vertices into the donor vertices point IDs
534 // 3. Translating the cone of each periodic point into the donor point IDs
535 // 4. Comparing the periodic-to-donor cone to the donor cone for each point
536 // 5. Apply the necessary transformation to the periodic cone to make it match the donor cone
537 static PetscErrorCode DMPlexCorrectOrientationForIsoperiodic(DM dm)
538 {
539   MPI_Comm        comm;
540   DM_Plex        *plex = (DM_Plex *)dm->data;
541   PetscInt        nroots, nleaves;
542   PetscInt       *local_vec_perm = NULL, local_vec_length = 0, *global_vec_perm = NULL, global_vec_length = 0;
543   const PetscInt *filocal, coords_field_id = 0;
544   DM              cdm;
545   PetscSection    csection, localSection = NULL;
546   PetscSF         sfNatural_old = NULL;
547   Vec             coordinates;
548   PetscMPIInt     myrank;
549   PetscBool       debug_printing = PETSC_FALSE;
550 
551   PetscFunctionBeginUser;
552   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
553   PetscCallMPI(MPI_Comm_rank(comm, &myrank));
554   PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
555   PetscCheck(coordinates, comm, PETSC_ERR_ARG_WRONGSTATE, "DM must have coordinates to setup isoperiodic");
556   PetscCall(DMGetCoordinateDM(dm, &cdm));
557   PetscCall(DMGetLocalSection(cdm, &csection));
558 
559   PetscCall(DMGetNaturalSF(dm, &sfNatural_old));
560   // Prep data for naturalSF correction
561   if (plex->periodic.num_face_sfs > 0 && sfNatural_old) {
562     PetscSection       globalSection;
563     PetscSF            pointSF, sectionSF;
564     PetscInt           nleaves;
565     const PetscInt    *ilocal;
566     const PetscSFNode *iremote;
567 
568     // Create global section with just pointSF and including constraints
569     PetscCall(DMGetLocalSection(dm, &localSection));
570     PetscCall(DMGetPointSF(dm, &pointSF));
571     PetscCall(PetscSectionCreateGlobalSection(localSection, pointSF, PETSC_TRUE, PETSC_TRUE, PETSC_FALSE, &globalSection));
572 
573     // Set local_vec_perm to be negative values when that dof is not owned by the current rank
574     // Dofs that are owned are set to their corresponding global Vec index
575     PetscCall(PetscSectionGetStorageSize(globalSection, &global_vec_length));
576     PetscCall(PetscSectionGetStorageSize(localSection, &local_vec_length));
577     PetscCall(PetscMalloc2(global_vec_length, &global_vec_perm, local_vec_length, &local_vec_perm));
578     for (PetscInt i = 0; i < global_vec_length; i++) global_vec_perm[i] = i;
579     for (PetscInt i = 0; i < local_vec_length; i++) local_vec_perm[i] = -(i + 1);
580 
581     PetscCall(PetscSFCreate(comm, &sectionSF));
582     PetscCall(PetscSFSetGraphSection(sectionSF, localSection, globalSection));
583     PetscCall(PetscSFGetGraph(sectionSF, NULL, &nleaves, &ilocal, &iremote));
584     for (PetscInt l = 0; l < nleaves; l++) {
585       if (iremote[l].rank != myrank) continue;
586       PetscInt local_index        = ilocal ? ilocal[l] : l;
587       local_vec_perm[local_index] = global_vec_perm[iremote[l].index];
588     }
589     PetscCall(PetscSectionDestroy(&globalSection));
590     PetscCall(PetscSFDestroy(&sectionSF));
591   }
592 
593   // Create sf_vert_coords and sf_face_cones for communicating donor vertices and cones to periodic faces, respectively
594   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
595     PetscSF face_sf                   = plex->periodic.face_sfs[f];
596     const PetscScalar (*transform)[4] = (const PetscScalar (*)[4])plex->periodic.transform[f];
597     PetscInt *face_vertices_size, *face_cones_size;
598     PetscInt  fStart, fEnd, vStart, vEnd, rootnumvert, leafnumvert, rootconesize, leafconesize, dim;
599     PetscSF   sf_vert_coords, sf_face_cones;
600     PetscBT   rootbt;
601 
602     PetscCall(DMGetCoordinateDim(dm, &dim));
603     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
604     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
605     PetscCall(PetscCalloc2(fEnd - fStart, &face_vertices_size, fEnd - fStart, &face_cones_size));
606 
607     // Create SFs to communicate donor vertices and donor cones to periodic faces
608     for (PetscInt f = fStart, index = 0; f < fEnd; f++, index++) {
609       PetscInt cl_size, *closure = NULL, num_vertices = 0;
610       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
611       for (PetscInt p = 0; p < cl_size; p++) {
612         PetscInt cl_point = closure[2 * p];
613         if (IsPointInsideStratum(cl_point, vStart, vEnd)) num_vertices++;
614         else {
615           PetscInt cone_size;
616           PetscCall(DMPlexGetConeSize(dm, cl_point, &cone_size));
617           face_cones_size[index] += cone_size + 2;
618         }
619       }
620       face_vertices_size[index] = num_vertices;
621       face_cones_size[index] += num_vertices;
622       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
623     }
624     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, face_vertices_size, &rootnumvert, &leafnumvert, &rootbt, &sf_vert_coords));
625     PetscCall(PetscBTDestroy(&rootbt));
626     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, face_cones_size, &rootconesize, &leafconesize, &rootbt, &sf_face_cones));
627 
628     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, NULL));
629 
630     PetscReal *leaf_donor_coords;
631     PetscInt  *leaf_donor_cones;
632 
633     { // Communicate donor coords and cones to the periodic faces
634       PetscReal         *mydonor_vertices;
635       PetscInt          *mydonor_cones;
636       const PetscScalar *coords_arr;
637 
638       PetscCall(PetscCalloc2(rootnumvert * dim, &mydonor_vertices, rootconesize, &mydonor_cones));
639       PetscCall(VecGetArrayRead(coordinates, &coords_arr));
640       for (PetscInt donor_face = 0, donor_vert_offset = 0, donor_cone_offset = 0; donor_face < nroots; donor_face++) {
641         if (!PetscBTLookup(rootbt, donor_face)) continue;
642         PetscInt cl_size, *closure = NULL;
643 
644         PetscCall(DMPlexGetTransitiveClosure(dm, donor_face, PETSC_TRUE, &cl_size, &closure));
645         // Pack vertex coordinates
646         for (PetscInt p = 0; p < cl_size; p++) {
647           PetscInt cl_point = closure[2 * p], dof, offset;
648           if (!IsPointInsideStratum(cl_point, vStart, vEnd)) continue;
649           mydonor_cones[donor_cone_offset++] = cl_point;
650           PetscCall(PetscSectionGetFieldDof(csection, cl_point, coords_field_id, &dof));
651           PetscCall(PetscSectionGetFieldOffset(csection, cl_point, coords_field_id, &offset));
652           PetscAssert(dof == dim, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " has dof size %" PetscInt_FMT ", but should match dimension size %" PetscInt_FMT, cl_point, dof, dim);
653           // Apply isoperiodic transform to donor vertices such that corresponding periodic vertices should match exactly
654           for (PetscInt d = 0; d < dof; d++) mydonor_vertices[donor_vert_offset * dim + d] = PetscRealPart(coords_arr[offset + d]) + PetscRealPart(transform[d][3]);
655           donor_vert_offset++;
656         }
657         // Pack cones of face points (including face itself)
658         for (PetscInt p = 0; p < cl_size; p++) {
659           PetscInt        cl_point = closure[2 * p], cone_size, depth;
660           const PetscInt *cone;
661 
662           PetscCall(DMPlexGetConeSize(dm, cl_point, &cone_size));
663           PetscCall(DMPlexGetCone(dm, cl_point, &cone));
664           PetscCall(DMPlexGetPointDepth(dm, cl_point, &depth));
665           if (depth == 0) continue; // don't include vertex depth
666           mydonor_cones[donor_cone_offset++] = cone_size;
667           mydonor_cones[donor_cone_offset++] = cl_point;
668           PetscArraycpy(&mydonor_cones[donor_cone_offset], cone, cone_size);
669           donor_cone_offset += cone_size;
670         }
671         PetscCall(DMPlexRestoreTransitiveClosure(dm, donor_face, PETSC_TRUE, &cl_size, &closure));
672       }
673       PetscCall(VecRestoreArrayRead(coordinates, &coords_arr));
674       PetscCall(PetscBTDestroy(&rootbt));
675 
676       MPI_Datatype vertex_unit;
677       PetscMPIInt  n;
678       PetscCall(PetscMPIIntCast(dim, &n));
679       PetscCallMPI(MPI_Type_contiguous(n, MPIU_REAL, &vertex_unit));
680       PetscCallMPI(MPI_Type_commit(&vertex_unit));
681       PetscCall(PetscMalloc2(leafnumvert * 3, &leaf_donor_coords, leafconesize, &leaf_donor_cones));
682       PetscCall(PetscSFBcastBegin(sf_vert_coords, vertex_unit, mydonor_vertices, leaf_donor_coords, MPI_REPLACE));
683       PetscCall(PetscSFBcastBegin(sf_face_cones, MPIU_INT, mydonor_cones, leaf_donor_cones, MPI_REPLACE));
684       PetscCall(PetscSFBcastEnd(sf_vert_coords, vertex_unit, mydonor_vertices, leaf_donor_coords, MPI_REPLACE));
685       PetscCall(PetscSFBcastEnd(sf_face_cones, MPIU_INT, mydonor_cones, leaf_donor_cones, MPI_REPLACE));
686       PetscCall(PetscSFDestroy(&sf_vert_coords));
687       PetscCall(PetscSFDestroy(&sf_face_cones));
688       PetscCallMPI(MPI_Type_free(&vertex_unit));
689       PetscCall(PetscFree2(mydonor_vertices, mydonor_cones));
690     }
691 
692     { // Determine periodic orientation w/rt donor vertices and reorient
693       PetscReal tol = PetscSqr(PETSC_MACHINE_EPSILON * 1e3);
694       PetscInt *periodic2donor, dm_depth, maxConeSize;
695       PetscInt  coords_offset = 0, cones_offset = 0;
696 
697       PetscCall(DMPlexGetDepth(dm, &dm_depth));
698       PetscCall(DMPlexGetMaxSizes(dm, &maxConeSize, NULL));
699       PetscCall(DMGetWorkArray(dm, 2 * PetscPowInt(maxConeSize, dm_depth - 1), MPIU_INT, &periodic2donor));
700 
701       // Translate the periodic face vertices into the donor vertices
702       // Translation stored in periodic2donor
703       for (PetscInt i = 0; i < nleaves; i++) {
704         PetscInt  periodic_face = filocal[i], cl_size, num_verts = face_vertices_size[periodic_face - fStart];
705         PetscInt  cones_size = face_cones_size[periodic_face - fStart], p2d_count = 0;
706         PetscInt *closure = NULL;
707 
708         PetscCall(DMPlexGetTransitiveClosure(dm, periodic_face, PETSC_TRUE, &cl_size, &closure));
709         for (PetscInt p = 0; p < cl_size; p++) {
710           PetscInt     cl_point = closure[2 * p], coords_size, donor_vertex = -1;
711           PetscScalar *coords = NULL;
712 
713           if (!IsPointInsideStratum(cl_point, vStart, vEnd)) continue;
714           PetscCall(DMPlexVecGetClosure(dm, csection, coordinates, cl_point, &coords_size, &coords));
715           PetscAssert(coords_size == dim, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " has dof size %" PetscInt_FMT ", but should match dimension size %" PetscInt_FMT, cl_point, coords_size, dim);
716 
717           for (PetscInt v = 0; v < num_verts; v++) {
718             PetscReal dist_sqr = 0;
719             for (PetscInt d = 0; d < coords_size; d++) dist_sqr += PetscSqr(PetscRealPart(coords[d]) - leaf_donor_coords[(v + coords_offset) * dim + d]);
720             if (dist_sqr < tol) {
721               donor_vertex = leaf_donor_cones[cones_offset + v];
722               break;
723             }
724           }
725           PetscCheck(donor_vertex >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Periodic face %" PetscInt_FMT " could not find matching donor vertex for vertex %" PetscInt_FMT, periodic_face, cl_point);
726           if (PetscDefined(USE_DEBUG)) {
727             for (PetscInt c = 0; c < p2d_count; c++) PetscCheck(periodic2donor[2 * c + 1] != donor_vertex, comm, PETSC_ERR_PLIB, "Found repeated cone_point in periodic_ordering");
728           }
729 
730           periodic2donor[2 * p2d_count + 0] = cl_point;
731           periodic2donor[2 * p2d_count + 1] = donor_vertex;
732           p2d_count++;
733           PetscCall(DMPlexVecRestoreClosure(dm, csection, coordinates, cl_point, &coords_size, &coords));
734         }
735         coords_offset += num_verts;
736         PetscCall(DMPlexRestoreTransitiveClosure(dm, periodic_face, PETSC_TRUE, &cl_size, &closure));
737 
738         { // Determine periodic orientation w/rt donor vertices and reorient
739           PetscInt      depth, *p2d_cone, face_is_array[1] = {periodic_face};
740           IS           *is_arrays, face_is;
741           PetscSection *section_arrays;
742           PetscInt     *donor_cone_array = &leaf_donor_cones[cones_offset + num_verts];
743 
744           PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 1, face_is_array, PETSC_USE_POINTER, &face_is));
745           PetscCall(DMPlexGetConeRecursive(dm, face_is, &depth, &is_arrays, &section_arrays));
746           PetscCall(ISDestroy(&face_is));
747           PetscCall(DMGetWorkArray(dm, maxConeSize, MPIU_INT, &p2d_cone));
748           for (PetscInt d = 0; d < depth - 1; d++) {
749             PetscInt        pStart, pEnd;
750             PetscSection    section = section_arrays[d];
751             const PetscInt *periodic_cone_arrays, *periodic_point_arrays;
752 
753             PetscCall(ISGetIndices(is_arrays[d], &periodic_cone_arrays));
754             PetscCall(ISGetIndices(is_arrays[d + 1], &periodic_point_arrays)); // Points at d+1 correspond to the cones at d
755             PetscCall(PetscSectionGetChart(section_arrays[d], &pStart, &pEnd));
756             for (PetscInt p = pStart; p < pEnd; p++) {
757               PetscInt periodic_cone_size, periodic_cone_offset, periodic_point = periodic_point_arrays[p];
758 
759               PetscCall(PetscSectionGetDof(section, p, &periodic_cone_size));
760               PetscCall(PetscSectionGetOffset(section, p, &periodic_cone_offset));
761               const PetscInt *periodic_cone = &periodic_cone_arrays[periodic_cone_offset];
762               PetscCall(TranslateConeP2D(periodic_cone, periodic_cone_size, periodic2donor, p2d_count, p2d_cone));
763 
764               // Find the donor cone that matches the periodic point's cone
765               PetscInt  donor_cone_offset = 0, donor_point = -1, *donor_cone = NULL;
766               PetscBool cone_matches = PETSC_FALSE;
767               while (donor_cone_offset < cones_size - num_verts) {
768                 PetscInt donor_cone_size = donor_cone_array[donor_cone_offset];
769                 donor_point              = donor_cone_array[donor_cone_offset + 1];
770                 donor_cone               = &donor_cone_array[donor_cone_offset + 2];
771 
772                 if (donor_cone_size != periodic_cone_size) goto next_cone;
773                 for (PetscInt c = 0; c < periodic_cone_size; c++) {
774                   cone_matches = SearchIntArray(donor_cone[c], periodic_cone_size, p2d_cone);
775                   if (!cone_matches) goto next_cone;
776                 }
777                 // Save the found donor cone's point to the translation array. These will be used for higher depth points.
778                 // i.e. we save the edge translations for when we look for face cones
779                 periodic2donor[2 * p2d_count + 0] = periodic_point;
780                 periodic2donor[2 * p2d_count + 1] = donor_point;
781                 p2d_count++;
782                 break;
783 
784               next_cone:
785                 donor_cone_offset += donor_cone_size + 2;
786               }
787               PetscCheck(donor_cone_offset < cones_size - num_verts, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find donor cone equivalent to cone of periodic point %" PetscInt_FMT, periodic_point);
788 
789               { // Compare the donor cone with the translated periodic cone and reorient
790                 PetscInt       ornt;
791                 DMPolytopeType cell_type;
792                 PetscBool      found;
793                 PetscCall(DMPlexGetCellType(dm, periodic_point, &cell_type));
794                 PetscCall(DMPolytopeMatchOrientation(cell_type, donor_cone, p2d_cone, &ornt, &found));
795                 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Could not find transformation between donor (%" PetscInt_FMT ") and periodic (%" PetscInt_FMT ") cone's", periodic_point, donor_point);
796                 if (debug_printing) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Reorienting point %" PetscInt_FMT " by %" PetscInt_FMT "\n", periodic_point, ornt));
797                 PetscCall(DMPlexOrientPointWithCorrections(dm, periodic_point, ornt, localSection, local_vec_length, local_vec_perm));
798               }
799             }
800             PetscCall(ISRestoreIndices(is_arrays[d], &periodic_cone_arrays));
801             PetscCall(ISRestoreIndices(is_arrays[d + 1], &periodic_point_arrays));
802           }
803           PetscCall(DMRestoreWorkArray(dm, maxConeSize, MPIU_INT, &p2d_cone));
804           PetscCall(DMPlexRestoreConeRecursive(dm, face_is, &depth, &is_arrays, &section_arrays));
805         }
806 
807         PetscCall(DMPlexRestoreTransitiveClosure(dm, periodic_face, PETSC_TRUE, &cl_size, &closure));
808         cones_offset += cones_size;
809       }
810       PetscCall(DMRestoreWorkArray(dm, 2 * PetscPowInt(maxConeSize, dm_depth - 1), MPIU_INT, &periodic2donor));
811     }
812     // Re-set local coordinates (i.e. destroy global coordinates if they were modified)
813     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
814 
815     PetscCall(PetscFree2(leaf_donor_coords, leaf_donor_cones));
816     PetscCall(PetscFree2(face_vertices_size, face_cones_size));
817   }
818 
819   if (sfNatural_old) { // Correct SFNatural based on the permutation of the local vector
820     PetscSF      newglob_to_oldglob_sf, sfNatural_old, sfNatural_new;
821     PetscSFNode *remote;
822 
823     { // Translate permutation of local Vec into permutation of global Vec
824       PetscInt g = 0;
825       PetscBT  global_vec_check; // Verify that global indices aren't doubled
826       PetscCall(PetscBTCreate(global_vec_length, &global_vec_check));
827       for (PetscInt l = 0; l < local_vec_length; l++) {
828         PetscInt global_index = local_vec_perm[l];
829         if (global_index < 0) continue;
830         PetscCheck(!PetscBTLookupSet(global_vec_check, global_index), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found duplicate global index %" PetscInt_FMT " in local_vec_perm", global_index);
831         global_vec_perm[g++] = global_index;
832       }
833       PetscCheck(g == global_vec_length, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Wrong number of non-negative local indices");
834       PetscCall(PetscBTDestroy(&global_vec_check));
835     }
836 
837     PetscCall(PetscMalloc1(global_vec_length, &remote));
838     for (PetscInt i = 0; i < global_vec_length; i++) {
839       remote[i].rank  = myrank;
840       remote[i].index = global_vec_perm[i];
841     }
842     PetscCall(PetscFree2(global_vec_perm, local_vec_perm));
843     PetscCall(PetscSFCreate(comm, &newglob_to_oldglob_sf));
844     PetscCall(PetscSFSetGraph(newglob_to_oldglob_sf, global_vec_length, global_vec_length, NULL, PETSC_USE_POINTER, remote, PETSC_OWN_POINTER));
845     PetscCall(DMGetNaturalSF(dm, &sfNatural_old));
846     PetscCall(PetscSFCompose(newglob_to_oldglob_sf, sfNatural_old, &sfNatural_new));
847     PetscCall(DMSetNaturalSF(dm, sfNatural_new));
848     PetscCall(PetscSFDestroy(&sfNatural_new));
849     PetscCall(PetscSFDestroy(&newglob_to_oldglob_sf));
850   }
851   PetscFunctionReturn(PETSC_SUCCESS);
852 }
853 
854 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure.
855 //
856 // Output Arguments:
857 //
858 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
859 //   can be used to create a global section and section SF.
860 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
861 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
862 //
863 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
864 {
865   MPI_Comm           comm;
866   PetscMPIInt        rank;
867   PetscSF            point_sf;
868   PetscInt           nroots, nleaves;
869   const PetscInt    *filocal;
870   const PetscSFNode *firemote;
871 
872   PetscFunctionBegin;
873   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
874   PetscCallMPI(MPI_Comm_rank(comm, &rank));
875   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
876   PetscCall(PetscMalloc1(num_face_sfs, is_points));
877 
878   PetscCall(DMPlexCorrectOrientationForIsoperiodic(dm));
879 
880   for (PetscInt f = 0; f < num_face_sfs; f++) {
881     PetscSF   face_sf = face_sfs[f];
882     PetscInt *cl_sizes;
883     PetscInt  fStart, fEnd, rootbuffersize, leafbuffersize;
884     PetscSF   sf_closure;
885     PetscBT   rootbt;
886 
887     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
888     PetscCall(PetscMalloc1(fEnd - fStart, &cl_sizes));
889     for (PetscInt f = fStart, index = 0; f < fEnd; f++, index++) {
890       PetscInt cl_size, *closure = NULL;
891       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
892       cl_sizes[index] = cl_size - 1;
893       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
894     }
895 
896     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, cl_sizes, &rootbuffersize, &leafbuffersize, &rootbt, &sf_closure));
897     PetscCall(PetscFree(cl_sizes));
898     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
899 
900     PetscSFNode *leaf_donor_closure;
901     { // Pack root buffer with owner for every point in the root cones
902       PetscSFNode       *donor_closure;
903       const PetscInt    *pilocal;
904       const PetscSFNode *piremote;
905       PetscInt           npoints;
906 
907       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
908       PetscCall(PetscCalloc1(rootbuffersize, &donor_closure));
909       for (PetscInt p = 0, root_offset = 0; p < nroots; p++) {
910         if (!PetscBTLookup(rootbt, p)) continue;
911         PetscInt cl_size, *closure = NULL;
912         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
913         for (PetscInt j = 1; j < cl_size; j++) {
914           PetscInt c = closure[2 * j];
915           if (pilocal) {
916             PetscInt found = -1;
917             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
918             if (found >= 0) {
919               donor_closure[root_offset++] = piremote[found];
920               continue;
921             }
922           }
923           // we own c
924           donor_closure[root_offset].rank  = rank;
925           donor_closure[root_offset].index = c;
926           root_offset++;
927         }
928         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
929       }
930 
931       PetscCall(PetscMalloc1(leafbuffersize, &leaf_donor_closure));
932       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
933       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
934       PetscCall(PetscSFDestroy(&sf_closure));
935       PetscCall(PetscFree(donor_closure));
936     }
937 
938     PetscSFNode *new_iremote;
939     PetscCall(PetscCalloc1(nroots, &new_iremote));
940     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
941     // Walk leaves and match vertices
942     for (PetscInt i = 0, leaf_offset = 0; i < nleaves; i++) {
943       PetscInt  point   = filocal[i], cl_size;
944       PetscInt *closure = NULL;
945       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
946       for (PetscInt j = 1; j < cl_size; j++) {
947         PetscInt    c  = closure[2 * j];
948         PetscSFNode lc = leaf_donor_closure[leaf_offset];
949         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
950         if (new_iremote[c].rank == -1) {
951           new_iremote[c] = lc;
952         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
953         leaf_offset++;
954       }
955       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
956     }
957     PetscCall(PetscFree(leaf_donor_closure));
958 
959     // Include face points in closure SF
960     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
961     // consolidate leaves
962     PetscInt *leafdata;
963     PetscCall(PetscMalloc1(nroots, &leafdata));
964     PetscInt num_new_leaves = 0;
965     for (PetscInt i = 0; i < nroots; i++) {
966       if (new_iremote[i].rank == -1) continue;
967       new_iremote[num_new_leaves] = new_iremote[i];
968       leafdata[num_new_leaves]    = i;
969       num_new_leaves++;
970     }
971     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
972 
973     PetscSF csf;
974     PetscCall(PetscSFCreate(comm, &csf));
975     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
976     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
977     PetscCall(PetscFree(leafdata));
978     PetscCall(PetscBTDestroy(&rootbt));
979 
980     PetscInt npoints;
981     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
982     if (npoints < 0) { // empty point_sf
983       *closure_sf = csf;
984     } else {
985       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
986       PetscCall(PetscSFDestroy(&csf));
987     }
988     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
989     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
990   }
991   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 
995 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
996 {
997   DM_Plex *plex = (DM_Plex *)dm->data;
998 
999   PetscFunctionBegin;
1000   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
1001   if (sf) *sf = plex->periodic.composed_sf;
1002   PetscFunctionReturn(PETSC_SUCCESS);
1003 }
1004 
1005 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
1006 {
1007   DM_Plex    *plex = (DM_Plex *)old_dm->data;
1008   PetscSF     sf_point, *new_face_sfs;
1009   PetscMPIInt rank;
1010 
1011   PetscFunctionBegin;
1012   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
1013   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
1014   PetscCall(DMGetPointSF(dm, &sf_point));
1015   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
1016 
1017   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
1018     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
1019     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
1020     const PetscInt    *old_local, *point_local;
1021     const PetscSFNode *old_remote, *point_remote;
1022 
1023     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
1024     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
1025     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
1026     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
1027     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
1028 
1029     // Fill new_leafdata with new owners of all points
1030     for (PetscInt i = 0; i < new_npoints; i++) {
1031       new_leafdata[i].rank  = rank;
1032       new_leafdata[i].index = i;
1033     }
1034     for (PetscInt i = 0; i < point_nleaf; i++) {
1035       PetscInt j      = point_local[i];
1036       new_leafdata[j] = point_remote[i];
1037     }
1038     // REPLACE is okay because every leaf agrees about the new owners
1039     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
1040     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
1041     // rootdata now contains the new owners
1042 
1043     // Send to leaves of old space
1044     for (PetscInt i = 0; i < old_npoints; i++) {
1045       leafdata[i].rank  = -1;
1046       leafdata[i].index = -1;
1047     }
1048     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
1049     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
1050 
1051     // Send to new leaf space
1052     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
1053     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
1054 
1055     PetscInt     nface = 0, *new_local;
1056     PetscSFNode *new_remote;
1057     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
1058     PetscCall(PetscMalloc1(nface, &new_local));
1059     PetscCall(PetscMalloc1(nface, &new_remote));
1060     nface = 0;
1061     for (PetscInt i = 0; i < new_npoints; i++) {
1062       if (new_leafdata[i].rank == -1) continue;
1063       new_local[nface]  = i;
1064       new_remote[nface] = new_leafdata[i];
1065       nface++;
1066     }
1067     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
1068     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
1069     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
1070     {
1071       char new_face_sf_name[PETSC_MAX_PATH_LEN];
1072       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
1073       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
1074     }
1075   }
1076 
1077   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
1078   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
1079   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
1080   PetscCall(PetscFree(new_face_sfs));
1081   PetscFunctionReturn(PETSC_SUCCESS);
1082 }
1083 
1084 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
1085 {
1086   DM_Plex   *plex = (DM_Plex *)dm->data;
1087   PetscCount count;
1088   IS         isdof;
1089   PetscInt   dim;
1090 
1091   PetscFunctionBegin;
1092   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
1093   PetscCheck(plex->periodic.periodic_points, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Isoperiodic PointSF must be created before this function is called");
1094 
1095   PetscCall(DMGetCoordinateDim(dm, &dim));
1096   dm->periodic.num_affines = plex->periodic.num_face_sfs;
1097   PetscCall(PetscFree2(dm->periodic.affine_to_local, dm->periodic.affine));
1098   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
1099 
1100   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
1101     PetscInt        npoints, vsize, isize;
1102     const PetscInt *points;
1103     IS              is = plex->periodic.periodic_points[f];
1104     PetscSegBuffer  seg;
1105     PetscSection    section;
1106     PetscInt       *ind;
1107     Vec             L, P;
1108     VecType         vec_type;
1109     VecScatter      scatter;
1110     PetscScalar    *x;
1111 
1112     PetscCall(DMGetLocalSection(dm, &section));
1113     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
1114     PetscCall(ISGetSize(is, &npoints));
1115     PetscCall(ISGetIndices(is, &points));
1116     for (PetscInt i = 0; i < npoints; i++) {
1117       PetscInt point = points[i], off, dof;
1118 
1119       PetscCall(PetscSectionGetOffset(section, point, &off));
1120       PetscCall(PetscSectionGetDof(section, point, &dof));
1121       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
1122       for (PetscInt j = 0; j < dof / dim; j++) {
1123         PetscInt *slot;
1124 
1125         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
1126         *slot = off / dim + j;
1127       }
1128     }
1129     PetscCall(PetscSegBufferGetSize(seg, &count));
1130     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
1131     PetscCall(PetscSegBufferDestroy(&seg));
1132     PetscCall(PetscIntCast(count, &isize));
1133     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));
1134 
1135     PetscCall(PetscIntCast(count * dim, &vsize));
1136     PetscCall(DMGetLocalVector(dm, &L));
1137     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
1138     PetscCall(VecSetSizes(P, vsize, vsize));
1139     PetscCall(VecGetType(L, &vec_type));
1140     PetscCall(VecSetType(P, vec_type));
1141     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
1142     PetscCall(DMRestoreLocalVector(dm, &L));
1143     PetscCall(ISDestroy(&isdof));
1144 
1145     PetscCall(VecGetArrayWrite(P, &x));
1146     for (PetscCount i = 0; i < count; i++) {
1147       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
1148     }
1149     PetscCall(VecRestoreArrayWrite(P, &x));
1150 
1151     dm->periodic.affine_to_local[f] = scatter;
1152     dm->periodic.affine[f]          = P;
1153   }
1154   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
1155   PetscFunctionReturn(PETSC_SUCCESS);
1156 }
1157 
1158 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
1159 {
1160   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
1161   const Ijk closure_1[] = {
1162     {0, 0, 0},
1163     {1, 0, 0},
1164   };
1165   const Ijk closure_2[] = {
1166     {0, 0, 0},
1167     {1, 0, 0},
1168     {1, 1, 0},
1169     {0, 1, 0},
1170   };
1171   const Ijk closure_3[] = {
1172     {0, 0, 0},
1173     {0, 1, 0},
1174     {1, 1, 0},
1175     {1, 0, 0},
1176     {0, 0, 1},
1177     {1, 0, 1},
1178     {1, 1, 1},
1179     {0, 1, 1},
1180   };
1181   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
1182   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
1183   const PetscInt        face_marker_1[]   = {1, 2};
1184   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
1185   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
1186   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
1187   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
1188   // These orientations can be determined by examining cones of a reference quad and hex element.
1189   const PetscInt        face_orient_1[]   = {0, 0};
1190   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
1191   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
1192   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
1193 
1194   PetscFunctionBegin;
1195   PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
1196   PetscAssertPointer(dm, 1);
1197   PetscValidLogicalCollectiveInt(dm, dim, 2);
1198   PetscCall(DMSetDimension(dm, dim));
1199   PetscMPIInt rank, size;
1200   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
1201   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
1202   for (PetscInt i = 0; i < dim; i++) {
1203     eextent[i] = faces[i];
1204     vextent[i] = faces[i] + 1;
1205   }
1206   ZLayout layout;
1207   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
1208   PetscZSet vset; // set of all vertices in the closure of the owned elements
1209   PetscCall(PetscZSetCreate(&vset));
1210   PetscInt local_elems = 0;
1211   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
1212     Ijk loc = ZCodeSplit(z);
1213     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
1214     else {
1215       z += ZStepOct(z);
1216       continue;
1217     }
1218     if (IjkActive(layout.eextent, loc)) {
1219       local_elems++;
1220       // Add all neighboring vertices to set
1221       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
1222         Ijk   inc  = closure_dim[dim][n];
1223         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
1224         ZCode v    = ZEncode(nloc);
1225         PetscCall(PetscZSetAdd(vset, v));
1226       }
1227     }
1228   }
1229   PetscInt local_verts, off = 0;
1230   ZCode   *vert_z;
1231   PetscCall(PetscZSetGetSize(vset, &local_verts));
1232   PetscCall(PetscMalloc1(local_verts, &vert_z));
1233   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
1234   PetscCall(PetscZSetDestroy(&vset));
1235   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
1236   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
1237 
1238   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
1239   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
1240   PetscCall(DMSetUp(dm));
1241   {
1242     PetscInt e = 0;
1243     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
1244       Ijk loc = ZCodeSplit(z);
1245       if (!IjkActive(layout.eextent, loc)) {
1246         z += ZStepOct(z);
1247         continue;
1248       }
1249       PetscInt cone[8], orient[8] = {0};
1250       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
1251         Ijk      inc  = closure_dim[dim][n];
1252         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
1253         ZCode    v    = ZEncode(nloc);
1254         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
1255         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
1256         cone[n] = local_elems + ci;
1257       }
1258       PetscCall(DMPlexSetCone(dm, e, cone));
1259       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
1260       e++;
1261     }
1262   }
1263 
1264   PetscCall(DMPlexSymmetrize(dm));
1265   PetscCall(DMPlexStratify(dm));
1266 
1267   { // Create point SF
1268     PetscSF sf;
1269     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
1270     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
1271     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
1272     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
1273     PetscInt    *local_ghosts;
1274     PetscSFNode *ghosts;
1275     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
1276     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
1277     for (PetscInt i = 0; i < num_ghosts;) {
1278       ZCode       z = vert_z[owned_verts + i];
1279       PetscMPIInt remote_rank, remote_count = 0;
1280 
1281       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout.zstarts), &remote_rank));
1282       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
1283       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
1284 
1285       // Count the elements on remote_rank
1286       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
1287 
1288       // Traverse vertices and make ghost links
1289       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
1290         Ijk loc = ZCodeSplit(rz);
1291         if (rz == z) {
1292           local_ghosts[i] = local_elems + owned_verts + i;
1293           ghosts[i].rank  = remote_rank;
1294           ghosts[i].index = remote_elem + remote_count;
1295           i++;
1296           if (i == num_ghosts) break;
1297           z = vert_z[owned_verts + i];
1298         }
1299         if (IjkActive(layout.vextent, loc)) remote_count++;
1300         else rz += ZStepOct(rz);
1301       }
1302     }
1303     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
1304     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
1305     PetscCall(DMSetPointSF(dm, sf));
1306     PetscCall(PetscSFDestroy(&sf));
1307   }
1308   {
1309     Vec          coordinates;
1310     PetscScalar *coords;
1311     PetscSection coord_section;
1312     PetscInt     coord_size;
1313     PetscCall(DMGetCoordinateSection(dm, &coord_section));
1314     PetscCall(PetscSectionSetNumFields(coord_section, 1));
1315     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
1316     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
1317     for (PetscInt v = 0; v < local_verts; v++) {
1318       PetscInt point = local_elems + v;
1319       PetscCall(PetscSectionSetDof(coord_section, point, dim));
1320       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
1321     }
1322     PetscCall(PetscSectionSetUp(coord_section));
1323     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
1324     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
1325     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
1326     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
1327     PetscCall(VecSetBlockSize(coordinates, dim));
1328     PetscCall(VecSetType(coordinates, VECSTANDARD));
1329     PetscCall(VecGetArray(coordinates, &coords));
1330     for (PetscInt v = 0; v < local_verts; v++) {
1331       Ijk loc             = ZCodeSplit(vert_z[v]);
1332       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
1333       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
1334       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
1335     }
1336     PetscCall(VecRestoreArray(coordinates, &coords));
1337     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
1338     PetscCall(VecDestroy(&coordinates));
1339   }
1340   if (interpolate) {
1341     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
1342 
1343     DMLabel label;
1344     PetscCall(DMCreateLabel(dm, "Face Sets"));
1345     PetscCall(DMGetLabel(dm, "Face Sets", &label));
1346     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
1347     for (PetscInt i = 0; i < 3; i++) {
1348       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
1349       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
1350       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
1351     }
1352     PetscInt fStart, fEnd, vStart, vEnd;
1353     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
1354     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
1355     for (PetscInt f = fStart; f < fEnd; f++) {
1356       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
1357       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
1358       PetscInt bc_count[6] = {0};
1359       for (PetscInt i = 0; i < npoints; i++) {
1360         PetscInt p = points[2 * i];
1361         if (!IsPointInsideStratum(p, vStart, vEnd)) continue;
1362         fverts[num_fverts++] = p;
1363         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
1364         // Convention here matches DMPlexCreateCubeMesh_Internal
1365         bc_count[0] += loc.i == 0;
1366         bc_count[1] += loc.i == layout.vextent.i - 1;
1367         bc_count[2] += loc.j == 0;
1368         bc_count[3] += loc.j == layout.vextent.j - 1;
1369         bc_count[4] += loc.k == 0;
1370         bc_count[5] += loc.k == layout.vextent.k - 1;
1371       }
1372       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
1373       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
1374         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
1375           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
1376           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
1377             PetscInt *put;
1378             if (bc % 2 == 0) { // donor face; no label
1379               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
1380               *put = f;
1381             } else { // periodic face
1382               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
1383               *put = f;
1384               ZCode *zput;
1385               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
1386               for (PetscInt i = 0; i < num_fverts; i++) {
1387                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
1388                 switch (bc / 2) {
1389                 case 0:
1390                   loc.i = 0;
1391                   break;
1392                 case 1:
1393                   loc.j = 0;
1394                   break;
1395                 case 2:
1396                   loc.k = 0;
1397                   break;
1398                 }
1399                 *zput++ = ZEncode(loc);
1400               }
1401             }
1402             continue;
1403           }
1404           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
1405           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
1406           bc_match++;
1407         }
1408       }
1409     }
1410     // Ensure that the Coordinate DM has our new boundary labels
1411     DM cdm;
1412     PetscCall(DMGetCoordinateDM(dm, &cdm));
1413     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
1414     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
1415       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
1416     }
1417     for (PetscInt i = 0; i < 3; i++) {
1418       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
1419       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
1420       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
1421     }
1422   }
1423   PetscCall(PetscFree(layout.zstarts));
1424   PetscCall(PetscFree(vert_z));
1425   PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
1426   PetscFunctionReturn(PETSC_SUCCESS);
1427 }
1428 
1429 /*@
1430   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
1431 
1432   Logically Collective
1433 
1434   Input Parameters:
1435 + dm           - The `DMPLEX` on which to set periodicity
1436 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
1437 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1438 
1439   Level: advanced
1440 
1441   Note:
1442   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
1443   and locally, but are paired when creating a global dof space.
1444 
1445 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
1446 @*/
1447 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
1448 {
1449   DM_Plex *plex = (DM_Plex *)dm->data;
1450 
1451   PetscFunctionBegin;
1452   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1453   if (num_face_sfs) {
1454     PetscAssertPointer(face_sfs, 3);
1455     PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
1456   } else PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", NULL));
1457   if (num_face_sfs == plex->periodic.num_face_sfs && (num_face_sfs == 0 || face_sfs == plex->periodic.face_sfs)) PetscFunctionReturn(PETSC_SUCCESS);
1458   PetscCall(DMSetGlobalSection(dm, NULL));
1459 
1460   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
1461   if (plex->periodic.num_face_sfs > 0) {
1462     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
1463     PetscCall(PetscFree(plex->periodic.face_sfs));
1464   }
1465 
1466   plex->periodic.num_face_sfs = num_face_sfs;
1467   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
1468   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
1469 
1470   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
1471   if (cdm) {
1472     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
1473     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
1474   }
1475   PetscFunctionReturn(PETSC_SUCCESS);
1476 }
1477 
1478 /*@C
1479   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
1480 
1481   Logically Collective
1482 
1483   Input Parameter:
1484 . dm - The `DMPLEX` for which to obtain periodic relation
1485 
1486   Output Parameters:
1487 + num_face_sfs - Number of `PetscSF`s in the array
1488 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1489 
1490   Level: advanced
1491 
1492 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1493 @*/
1494 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1495 {
1496   PetscBool isPlex;
1497 
1498   PetscFunctionBegin;
1499   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1500   PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isPlex));
1501   if (isPlex) {
1502     DM_Plex *plex = (DM_Plex *)dm->data;
1503     if (face_sfs) *face_sfs = plex->periodic.face_sfs;
1504     if (num_face_sfs) *num_face_sfs = plex->periodic.num_face_sfs;
1505   } else {
1506     if (face_sfs) *face_sfs = NULL;
1507     if (num_face_sfs) *num_face_sfs = 0;
1508   }
1509   PetscFunctionReturn(PETSC_SUCCESS);
1510 }
1511 
1512 /*@C
1513   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1514 
1515   Logically Collective
1516 
1517   Input Parameters:
1518 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1519 . n  - Number of transforms in array
1520 - t  - Array of 4x4 affine transformation basis.
1521 
1522   Level: advanced
1523 
1524   Notes:
1525   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1526   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1527   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1528   simple matrix multiplication.
1529 
1530   Although the interface accepts a general affine transform, only affine translation is supported at present.
1531 
1532   Developer Notes:
1533   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1534   adding GPU implementations to apply the G2L/L2G transforms.
1535 
1536 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1537 @*/
1538 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1539 {
1540   DM_Plex *plex = (DM_Plex *)dm->data;
1541 
1542   PetscFunctionBegin;
1543   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1544   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1545 
1546   PetscCall(PetscFree(plex->periodic.transform));
1547   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1548   for (PetscInt i = 0; i < n; i++) {
1549     for (PetscInt j = 0; j < 4; j++) {
1550       for (PetscInt k = 0; k < 4; k++) {
1551         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1552         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1553       }
1554     }
1555   }
1556   PetscFunctionReturn(PETSC_SUCCESS);
1557 }
1558