xref: /petsc/src/dm/impls/plex/plexsfc.c (revision dc97da86830806a16cfaacfab740baa2e54c8135)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 static unsigned ZCodeSplit1(ZCode z)
21 {
22   z = ((z & 01001001001001001) | ((z >> 2) & 02002002002002002) | ((z >> 4) & 04004004004004004));
23   z = (z | (z >> 6) | (z >> 12)) & 0000000777000000777;
24   z = (z | (z >> 18)) & 0777777;
25   return (unsigned)z;
26 }
27 
28 static ZCode ZEncode1(unsigned t)
29 {
30   ZCode z = t;
31   z       = (z | (z << 18)) & 0777000000777;
32   z       = (z | (z << 6) | (z << 12)) & 07007007007007007;
33   z       = (z | (z << 2) | (z << 4)) & 0111111111111111111;
34   return z;
35 }
36 
37 static Ijk ZCodeSplit(ZCode z)
38 {
39   Ijk c;
40   c.i = ZCodeSplit1(z >> 2);
41   c.j = ZCodeSplit1(z >> 1);
42   c.k = ZCodeSplit1(z >> 0);
43   return c;
44 }
45 
46 static ZCode ZEncode(Ijk c)
47 {
48   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
49   return z;
50 }
51 
52 static PetscBool IjkActive(Ijk extent, Ijk l)
53 {
54   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
55   return PETSC_FALSE;
56 }
57 
58 // If z is not the base of an octet (last 3 bits 0), return 0.
59 //
60 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
61 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
62 static ZCode ZStepOct(ZCode z)
63 {
64   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
65   ZCode step = 07;
66   for (; (z & step) == 0; step = (step << 3) | 07) { }
67   return step >> 3;
68 }
69 
70 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
71 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
72 {
73   PetscFunctionBegin;
74   layout->eextent.i = eextent[0];
75   layout->eextent.j = eextent[1];
76   layout->eextent.k = eextent[2];
77   layout->vextent.i = vextent[0];
78   layout->vextent.j = vextent[1];
79   layout->vextent.k = vextent[2];
80   layout->comm_size = size;
81   layout->zstarts   = NULL;
82   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
83 
84   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
85   ZCode    z           = 0;
86   layout->zstarts[0]   = 0;
87   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
88   for (PetscMPIInt r = 0; r < size; r++) {
89     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
90     for (count = 0; count < elems_needed; z++) {
91       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
92       for (ZCode s = skip;; s >>= 3) {
93         Ijk trial = ZCodeSplit(z + s);
94         if (IjkActive(layout->eextent, trial)) {
95           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
96           count += s + 1;
97           z += s;
98           break;
99         }
100         if (s == 0) { // the whole skip octet is inactive
101           z += skip;
102           break;
103         }
104       }
105     }
106     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
107     //
108     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
109     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
110     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
111     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
112     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
113     // complicate the job of identifying an owner and its offset.
114     //
115     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
116     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
117     // the issue:
118     //
119     // Consider this partition on rank 0 (left) and rank 1.
120     //
121     //    4 --------  5 -- 14 --10 -- 21 --11
122     //                |          |          |
123     // 7 -- 16 --  8  |          |          |
124     // |           |  3 -------  7 -------  9
125     // |           |             |          |
126     // 4 --------  6 ------ 10   |          |
127     // |           |         |   6 -- 16 -- 8
128     // |           |         |
129     // 3 ---11---  5 --18--  9
130     //
131     // The periodic face SF looks like
132     // [0] Number of roots=21, leaves=1, remote ranks=1
133     // [0] 16 <- (0,11)
134     // [1] Number of roots=22, leaves=2, remote ranks=2
135     // [1] 14 <- (0,18)
136     // [1] 21 <- (1,16)
137     //
138     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
139     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
140     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
141     //
142     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
143     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
144     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
145     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
146     // stranded vertices.
147     for (; z <= ZEncode(layout->vextent); z++) {
148       Ijk loc = ZCodeSplit(z);
149       if (IjkActive(layout->eextent, loc)) break;
150       z += ZStepOct(z);
151     }
152     layout->zstarts[r + 1] = z;
153   }
154   layout->zstarts[size] = ZEncode(layout->vextent);
155   PetscFunctionReturn(PETSC_SUCCESS);
156 }
157 
158 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
159 {
160   PetscInt remote_elem = 0;
161   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
162     Ijk loc = ZCodeSplit(rz);
163     if (IjkActive(layout->eextent, loc)) remote_elem++;
164     else rz += ZStepOct(rz);
165   }
166   return remote_elem;
167 }
168 
169 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
170 {
171   PetscInt lo = 0, hi = n;
172 
173   if (n == 0) return -1;
174   while (hi - lo > 1) {
175     PetscInt mid = lo + (hi - lo) / 2;
176     if (key < X[mid]) hi = mid;
177     else lo = mid;
178   }
179   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
180 }
181 
182 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
183 {
184   MPI_Comm    comm;
185   PetscInt    dim, vStart, vEnd;
186   PetscMPIInt size;
187   PetscSF     face_sfs[3];
188   PetscScalar transforms[3][4][4] = {{{0}}};
189 
190   PetscFunctionBegin;
191   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
192   PetscCallMPI(MPI_Comm_size(comm, &size));
193   PetscCall(DMGetDimension(dm, &dim));
194   const PetscInt csize = PetscPowInt(2, dim - 1);
195   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
196 
197   PetscInt num_directions = 0;
198   for (PetscInt direction = 0; direction < dim; direction++) {
199     PetscCount   num_faces;
200     PetscInt    *faces;
201     ZCode       *donor_verts, *donor_minz;
202     PetscSFNode *leaf;
203     PetscCount   num_multiroots = 0;
204     PetscInt     pStart, pEnd;
205     PetscBool    sorted;
206     PetscInt     inum_faces;
207 
208     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
209     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
210     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
211     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
212     PetscCall(PetscMalloc1(num_faces, &donor_minz));
213     PetscCall(PetscMalloc1(num_faces, &leaf));
214     for (PetscCount i = 0; i < num_faces; i++) {
215       ZCode minz = donor_verts[i * csize];
216 
217       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
218       donor_minz[i] = minz;
219     }
220     PetscCall(PetscIntCast(num_faces, &inum_faces));
221     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
222     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
223     // Checking for sorting is a cheap check that there are no duplicates.
224     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
225     for (PetscCount i = 0; i < num_faces;) {
226       ZCode       z           = donor_minz[i];
227       PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0;
228 
229       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
230       // Process all the vertices on this rank
231       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
232         Ijk loc = ZCodeSplit(rz);
233 
234         if (rz == z) {
235           leaf[i].rank  = remote_rank;
236           leaf[i].index = remote_count;
237           i++;
238           if (i == num_faces) break;
239           z = donor_minz[i];
240         }
241         if (IjkActive(layout->vextent, loc)) remote_count++;
242       }
243     }
244     PetscCall(PetscFree(donor_minz));
245     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
246     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
247     const PetscInt *my_donor_degree;
248     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
249     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
250 
251     for (PetscInt i = 0; i < vEnd - vStart; i++) {
252       num_multiroots += my_donor_degree[i];
253       if (my_donor_degree[i] == 0) continue;
254       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
255     }
256     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
257     PetscCount num_my_donors;
258 
259     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
260     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request does not match expected donors");
261     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
262     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
263     for (PetscCount i = 0; i < num_my_donors; i++) {
264       PetscInt f = my_donors[i];
265       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;
266 
267       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
268       for (PetscInt j = 0; j < num_points; j++) {
269         PetscInt p = points[2 * j];
270         if (p < vStart || vEnd <= p) continue;
271         minv = PetscMin(minv, p);
272       }
273       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
274       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
275       my_donor_indices[minv - vStart] = f;
276     }
277     PetscCall(PetscMalloc1(num_faces, &donor_indices));
278     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
279     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
280     PetscCall(PetscFree(my_donor_indices));
281     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
282     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
283     PetscCall(PetscFree(donor_indices));
284     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
285     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
286     {
287       char face_sf_name[PETSC_MAX_PATH_LEN];
288       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
289       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
290     }
291 
292     transforms[num_directions][0][0]         = 1;
293     transforms[num_directions][1][1]         = 1;
294     transforms[num_directions][2][2]         = 1;
295     transforms[num_directions][3][3]         = 1;
296     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
297     num_directions++;
298   }
299 
300   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
301   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
302 
303   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
304   PetscFunctionReturn(PETSC_SUCCESS);
305 }
306 
307 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
308 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
309 // We use this crude approach here so we don't have to write new GPU kernels yet.
310 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
311 {
312   PetscFunctionBegin;
313   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
314   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
315     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
316     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
317   }
318   PetscFunctionReturn(PETSC_SUCCESS);
319 }
320 
321 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
322 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
323 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
324 //
325 // Output Arguments:
326 //
327 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
328 //   can be used to create a global section and section SF.
329 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
330 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
331 //
332 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
333 {
334   MPI_Comm           comm;
335   PetscMPIInt        rank;
336   PetscSF            point_sf;
337   PetscInt           nroots, nleaves;
338   const PetscInt    *filocal;
339   const PetscSFNode *firemote;
340 
341   PetscFunctionBegin;
342   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
343   PetscCallMPI(MPI_Comm_rank(comm, &rank));
344   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
345   PetscCall(PetscMalloc1(num_face_sfs, is_points));
346 
347   for (PetscInt f = 0; f < num_face_sfs; f++) {
348     PetscSF   face_sf = face_sfs[f];
349     PetscInt *rootdata, *leafdata;
350 
351     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
352     PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
353     for (PetscInt i = 0; i < nleaves; i++) {
354       PetscInt point = filocal[i], cl_size, *closure = NULL;
355       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
356       leafdata[point] = cl_size - 1;
357       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
358     }
359     PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
360     PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
361 
362     PetscInt root_offset = 0;
363     for (PetscInt p = 0; p < nroots; p++) {
364       const PetscInt *donor_dof = rootdata + nroots;
365       if (donor_dof[p] == 0) {
366         rootdata[2 * p]     = -1;
367         rootdata[2 * p + 1] = -1;
368         continue;
369       }
370       PetscInt  cl_size;
371       PetscInt *closure = NULL;
372       PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
373       // cl_size - 1 = points not including self
374       PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
375       rootdata[2 * p]     = root_offset;
376       rootdata[2 * p + 1] = cl_size - 1;
377       root_offset += cl_size - 1;
378       PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
379     }
380     PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
381     PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
382     // Count how many leaves we need to communicate the closures
383     PetscInt leaf_offset = 0;
384     for (PetscInt i = 0; i < nleaves; i++) {
385       PetscInt point = filocal[i];
386       if (leafdata[2 * point + 1] < 0) continue;
387       leaf_offset += leafdata[2 * point + 1];
388     }
389 
390     PetscSFNode *closure_leaf;
391     PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
392     leaf_offset = 0;
393     for (PetscInt i = 0; i < nleaves; i++) {
394       PetscInt point   = filocal[i];
395       PetscInt cl_size = leafdata[2 * point + 1];
396       if (cl_size < 0) continue;
397       for (PetscInt j = 0; j < cl_size; j++) {
398         closure_leaf[leaf_offset].rank  = firemote[i].rank;
399         closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
400         leaf_offset++;
401       }
402     }
403 
404     PetscSF sf_closure;
405     PetscCall(PetscSFCreate(comm, &sf_closure));
406     PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
407 
408     PetscSFNode *leaf_donor_closure;
409     { // Pack root buffer with owner for every point in the root cones
410       PetscSFNode       *donor_closure;
411       const PetscInt    *pilocal;
412       const PetscSFNode *piremote;
413       PetscInt           npoints;
414 
415       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
416       PetscCall(PetscCalloc1(root_offset, &donor_closure));
417       root_offset = 0;
418       for (PetscInt p = 0; p < nroots; p++) {
419         if (rootdata[2 * p] < 0) continue;
420         PetscInt  cl_size;
421         PetscInt *closure = NULL;
422         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
423         for (PetscInt j = 1; j < cl_size; j++) {
424           PetscInt c = closure[2 * j];
425           if (pilocal) {
426             PetscInt found = -1;
427             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
428             if (found >= 0) {
429               donor_closure[root_offset++] = piremote[found];
430               continue;
431             }
432           }
433           // we own c
434           donor_closure[root_offset].rank  = rank;
435           donor_closure[root_offset].index = c;
436           root_offset++;
437         }
438         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
439       }
440 
441       PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
442       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
443       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
444       PetscCall(PetscSFDestroy(&sf_closure));
445       PetscCall(PetscFree(donor_closure));
446     }
447 
448     PetscSFNode *new_iremote;
449     PetscCall(PetscCalloc1(nroots, &new_iremote));
450     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
451     // Walk leaves and match vertices
452     leaf_offset = 0;
453     for (PetscInt i = 0; i < nleaves; i++) {
454       PetscInt  point   = filocal[i], cl_size;
455       PetscInt *closure = NULL;
456       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
457       for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
458         PetscInt    c  = closure[2 * j];
459         PetscSFNode lc = leaf_donor_closure[leaf_offset];
460         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
461         if (new_iremote[c].rank == -1) {
462           new_iremote[c] = lc;
463         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
464         leaf_offset++;
465       }
466       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
467     }
468     PetscCall(PetscFree(leaf_donor_closure));
469 
470     // Include face points in closure SF
471     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
472     // consolidate leaves
473     PetscInt num_new_leaves = 0;
474     for (PetscInt i = 0; i < nroots; i++) {
475       if (new_iremote[i].rank == -1) continue;
476       new_iremote[num_new_leaves] = new_iremote[i];
477       leafdata[num_new_leaves]    = i;
478       num_new_leaves++;
479     }
480     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
481 
482     PetscSF csf;
483     PetscCall(PetscSFCreate(comm, &csf));
484     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
485     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
486     PetscCall(PetscFree2(rootdata, leafdata));
487 
488     PetscInt npoints;
489     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
490     if (npoints < 0) { // empty point_sf
491       *closure_sf = csf;
492     } else {
493       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
494       PetscCall(PetscSFDestroy(&csf));
495     }
496     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
497     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
498   }
499   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
500   PetscFunctionReturn(PETSC_SUCCESS);
501 }
502 
503 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
504 {
505   DM_Plex *plex = (DM_Plex *)dm->data;
506 
507   PetscFunctionBegin;
508   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
509   if (sf) *sf = plex->periodic.composed_sf;
510   PetscFunctionReturn(PETSC_SUCCESS);
511 }
512 
513 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
514 {
515   DM_Plex    *plex = (DM_Plex *)old_dm->data;
516   PetscSF     sf_point, *new_face_sfs;
517   PetscMPIInt rank;
518 
519   PetscFunctionBegin;
520   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
521   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
522   PetscCall(DMGetPointSF(dm, &sf_point));
523   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
524 
525   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
526     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
527     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
528     const PetscInt    *old_local, *point_local;
529     const PetscSFNode *old_remote, *point_remote;
530 
531     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
532     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
533     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
534     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
535     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
536 
537     // Fill new_leafdata with new owners of all points
538     for (PetscInt i = 0; i < new_npoints; i++) {
539       new_leafdata[i].rank  = rank;
540       new_leafdata[i].index = i;
541     }
542     for (PetscInt i = 0; i < point_nleaf; i++) {
543       PetscInt j      = point_local[i];
544       new_leafdata[j] = point_remote[i];
545     }
546     // REPLACE is okay because every leaf agrees about the new owners
547     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
548     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
549     // rootdata now contains the new owners
550 
551     // Send to leaves of old space
552     for (PetscInt i = 0; i < old_npoints; i++) {
553       leafdata[i].rank  = -1;
554       leafdata[i].index = -1;
555     }
556     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
557     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
558 
559     // Send to new leaf space
560     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
561     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
562 
563     PetscInt     nface = 0, *new_local;
564     PetscSFNode *new_remote;
565     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
566     PetscCall(PetscMalloc1(nface, &new_local));
567     PetscCall(PetscMalloc1(nface, &new_remote));
568     nface = 0;
569     for (PetscInt i = 0; i < new_npoints; i++) {
570       if (new_leafdata[i].rank == -1) continue;
571       new_local[nface]  = i;
572       new_remote[nface] = new_leafdata[i];
573       nface++;
574     }
575     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
576     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
577     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
578     {
579       char new_face_sf_name[PETSC_MAX_PATH_LEN];
580       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
581       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
582     }
583   }
584 
585   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
586   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
587   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
588   PetscCall(PetscFree(new_face_sfs));
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
593 {
594   DM_Plex   *plex = (DM_Plex *)dm->data;
595   PetscCount count;
596   IS         isdof;
597   PetscInt   dim;
598 
599   PetscFunctionBegin;
600   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
601   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
603 
604   PetscCall(DMGetDimension(dm, &dim));
605   dm->periodic.num_affines = plex->periodic.num_face_sfs;
606   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
607 
608   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
609     PetscInt        npoints, vsize, isize;
610     const PetscInt *points;
611     IS              is = plex->periodic.periodic_points[f];
612     PetscSegBuffer  seg;
613     PetscSection    section;
614     PetscInt       *ind;
615     Vec             L, P;
616     VecType         vec_type;
617     VecScatter      scatter;
618     PetscScalar    *x;
619 
620     PetscCall(DMGetLocalSection(dm, &section));
621     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
622     PetscCall(ISGetSize(is, &npoints));
623     PetscCall(ISGetIndices(is, &points));
624     for (PetscInt i = 0; i < npoints; i++) {
625       PetscInt point = points[i], off, dof;
626 
627       PetscCall(PetscSectionGetOffset(section, point, &off));
628       PetscCall(PetscSectionGetDof(section, point, &dof));
629       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
630       for (PetscInt j = 0; j < dof / dim; j++) {
631         PetscInt *slot;
632 
633         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
634         *slot = off / dim + j;
635       }
636     }
637     PetscCall(PetscSegBufferGetSize(seg, &count));
638     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
639     PetscCall(PetscSegBufferDestroy(&seg));
640     PetscCall(PetscIntCast(count, &isize));
641     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));
642 
643     PetscCall(PetscIntCast(count * dim, &vsize));
644     PetscCall(DMGetLocalVector(dm, &L));
645     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
646     PetscCall(VecSetSizes(P, vsize, vsize));
647     PetscCall(VecGetType(L, &vec_type));
648     PetscCall(VecSetType(P, vec_type));
649     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
650     PetscCall(DMRestoreLocalVector(dm, &L));
651     PetscCall(ISDestroy(&isdof));
652 
653     PetscCall(VecGetArrayWrite(P, &x));
654     for (PetscCount i = 0; i < count; i++) {
655       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
656     }
657     PetscCall(VecRestoreArrayWrite(P, &x));
658 
659     dm->periodic.affine_to_local[f] = scatter;
660     dm->periodic.affine[f]          = P;
661   }
662   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 // We'll just orient all the edges, though only periodic boundary edges need orientation
667 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
668 {
669   PetscInt dim, eStart, eEnd;
670 
671   PetscFunctionBegin;
672   PetscCall(DMGetDimension(dm, &dim));
673   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
674   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
675   for (PetscInt e = eStart; e < eEnd; e++) {
676     const PetscInt *cone;
677     PetscCall(DMPlexGetCone(dm, e, &cone));
678     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
679   }
680   PetscFunctionReturn(PETSC_SUCCESS);
681 }
682 
683 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
684 {
685   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
686   const Ijk closure_1[] = {
687     {0, 0, 0},
688     {1, 0, 0},
689   };
690   const Ijk closure_2[] = {
691     {0, 0, 0},
692     {1, 0, 0},
693     {1, 1, 0},
694     {0, 1, 0},
695   };
696   const Ijk closure_3[] = {
697     {0, 0, 0},
698     {0, 1, 0},
699     {1, 1, 0},
700     {1, 0, 0},
701     {0, 0, 1},
702     {1, 0, 1},
703     {1, 1, 1},
704     {0, 1, 1},
705   };
706   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
707   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
708   const PetscInt        face_marker_1[]   = {1, 2};
709   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
710   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
711   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
712   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
713   // These orientations can be determined by examining cones of a reference quad and hex element.
714   const PetscInt        face_orient_1[]   = {0, 0};
715   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
716   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
717   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
718 
719   PetscFunctionBegin;
720   PetscAssertPointer(dm, 1);
721   PetscValidLogicalCollectiveInt(dm, dim, 2);
722   PetscCall(DMSetDimension(dm, dim));
723   PetscMPIInt rank, size;
724   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
725   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
726   for (PetscInt i = 0; i < dim; i++) {
727     eextent[i] = faces[i];
728     vextent[i] = faces[i] + 1;
729   }
730   ZLayout layout;
731   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
732   PetscZSet vset; // set of all vertices in the closure of the owned elements
733   PetscCall(PetscZSetCreate(&vset));
734   PetscInt local_elems = 0;
735   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
736     Ijk loc = ZCodeSplit(z);
737     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
738     else {
739       z += ZStepOct(z);
740       continue;
741     }
742     if (IjkActive(layout.eextent, loc)) {
743       local_elems++;
744       // Add all neighboring vertices to set
745       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
746         Ijk   inc  = closure_dim[dim][n];
747         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
748         ZCode v    = ZEncode(nloc);
749         PetscCall(PetscZSetAdd(vset, v));
750       }
751     }
752   }
753   PetscInt local_verts, off = 0;
754   ZCode   *vert_z;
755   PetscCall(PetscZSetGetSize(vset, &local_verts));
756   PetscCall(PetscMalloc1(local_verts, &vert_z));
757   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
758   PetscCall(PetscZSetDestroy(&vset));
759   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
760   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
761 
762   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
763   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
764   PetscCall(DMSetUp(dm));
765   {
766     PetscInt e = 0;
767     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
768       Ijk loc = ZCodeSplit(z);
769       if (!IjkActive(layout.eextent, loc)) {
770         z += ZStepOct(z);
771         continue;
772       }
773       PetscInt cone[8], orient[8] = {0};
774       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
775         Ijk      inc  = closure_dim[dim][n];
776         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
777         ZCode    v    = ZEncode(nloc);
778         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
779         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
780         cone[n] = local_elems + ci;
781       }
782       PetscCall(DMPlexSetCone(dm, e, cone));
783       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
784       e++;
785     }
786   }
787 
788   PetscCall(DMPlexSymmetrize(dm));
789   PetscCall(DMPlexStratify(dm));
790 
791   { // Create point SF
792     PetscSF sf;
793     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
794     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
795     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
796     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
797     PetscInt    *local_ghosts;
798     PetscSFNode *ghosts;
799     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
800     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
801     for (PetscInt i = 0; i < num_ghosts;) {
802       ZCode       z           = vert_z[owned_verts + i];
803       PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0;
804       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
805       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
806 
807       // Count the elements on remote_rank
808       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
809 
810       // Traverse vertices and make ghost links
811       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
812         Ijk loc = ZCodeSplit(rz);
813         if (rz == z) {
814           local_ghosts[i] = local_elems + owned_verts + i;
815           ghosts[i].rank  = remote_rank;
816           ghosts[i].index = remote_elem + remote_count;
817           i++;
818           if (i == num_ghosts) break;
819           z = vert_z[owned_verts + i];
820         }
821         if (IjkActive(layout.vextent, loc)) remote_count++;
822         else rz += ZStepOct(rz);
823       }
824     }
825     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
826     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
827     PetscCall(DMSetPointSF(dm, sf));
828     PetscCall(PetscSFDestroy(&sf));
829   }
830   {
831     Vec          coordinates;
832     PetscScalar *coords;
833     PetscSection coord_section;
834     PetscInt     coord_size;
835     PetscCall(DMGetCoordinateSection(dm, &coord_section));
836     PetscCall(PetscSectionSetNumFields(coord_section, 1));
837     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
838     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
839     for (PetscInt v = 0; v < local_verts; v++) {
840       PetscInt point = local_elems + v;
841       PetscCall(PetscSectionSetDof(coord_section, point, dim));
842       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
843     }
844     PetscCall(PetscSectionSetUp(coord_section));
845     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
846     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
847     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
848     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
849     PetscCall(VecSetBlockSize(coordinates, dim));
850     PetscCall(VecSetType(coordinates, VECSTANDARD));
851     PetscCall(VecGetArray(coordinates, &coords));
852     for (PetscInt v = 0; v < local_verts; v++) {
853       Ijk loc             = ZCodeSplit(vert_z[v]);
854       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
855       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
856       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
857     }
858     PetscCall(VecRestoreArray(coordinates, &coords));
859     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
860     PetscCall(VecDestroy(&coordinates));
861   }
862   if (interpolate) {
863     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
864     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
865     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
866     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
867     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
868     // be needed in a general CGNS reader, for example.
869     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
870 
871     DMLabel label;
872     PetscCall(DMCreateLabel(dm, "Face Sets"));
873     PetscCall(DMGetLabel(dm, "Face Sets", &label));
874     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
875     for (PetscInt i = 0; i < 3; i++) {
876       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
877       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
878       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
879     }
880     PetscInt fStart, fEnd, vStart, vEnd;
881     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
882     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
883     for (PetscInt f = fStart; f < fEnd; f++) {
884       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
885       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
886       PetscInt bc_count[6] = {0};
887       for (PetscInt i = 0; i < npoints; i++) {
888         PetscInt p = points[2 * i];
889         if (p < vStart || vEnd <= p) continue;
890         fverts[num_fverts++] = p;
891         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
892         // Convention here matches DMPlexCreateCubeMesh_Internal
893         bc_count[0] += loc.i == 0;
894         bc_count[1] += loc.i == layout.vextent.i - 1;
895         bc_count[2] += loc.j == 0;
896         bc_count[3] += loc.j == layout.vextent.j - 1;
897         bc_count[4] += loc.k == 0;
898         bc_count[5] += loc.k == layout.vextent.k - 1;
899       }
900       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
901       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
902         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
903           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
904           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
905             PetscInt *put;
906             if (bc % 2 == 0) { // donor face; no label
907               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
908               *put = f;
909             } else { // periodic face
910               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
911               *put = f;
912               ZCode *zput;
913               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
914               for (PetscInt i = 0; i < num_fverts; i++) {
915                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
916                 switch (bc / 2) {
917                 case 0:
918                   loc.i = 0;
919                   break;
920                 case 1:
921                   loc.j = 0;
922                   break;
923                 case 2:
924                   loc.k = 0;
925                   break;
926                 }
927                 *zput++ = ZEncode(loc);
928               }
929             }
930             continue;
931           }
932           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
933           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
934           bc_match++;
935         }
936       }
937     }
938     // Ensure that the Coordinate DM has our new boundary labels
939     DM cdm;
940     PetscCall(DMGetCoordinateDM(dm, &cdm));
941     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
942     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
943       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
944     }
945     for (PetscInt i = 0; i < 3; i++) {
946       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
947       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
948       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
949     }
950   }
951   PetscCall(PetscFree(layout.zstarts));
952   PetscCall(PetscFree(vert_z));
953   PetscFunctionReturn(PETSC_SUCCESS);
954 }
955 
956 /*@
957   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
958 
959   Logically Collective
960 
961   Input Parameters:
962 + dm           - The `DMPLEX` on which to set periodicity
963 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
964 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
965 
966   Level: advanced
967 
968   Note:
969   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
970   and locally, but are paired when creating a global dof space.
971 
972 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
973 @*/
974 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
975 {
976   DM_Plex *plex = (DM_Plex *)dm->data;
977 
978   PetscFunctionBegin;
979   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
980   if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
981   if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
982 
983   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
984 
985   if (plex->periodic.num_face_sfs > 0) {
986     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
987     PetscCall(PetscFree(plex->periodic.face_sfs));
988   }
989 
990   plex->periodic.num_face_sfs = num_face_sfs;
991   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
992   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
993 
994   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
995   if (cdm) {
996     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
997     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
998   }
999   PetscFunctionReturn(PETSC_SUCCESS);
1000 }
1001 
1002 /*@C
1003   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
1004 
1005   Logically Collective
1006 
1007   Input Parameter:
1008 . dm - The `DMPLEX` for which to obtain periodic relation
1009 
1010   Output Parameters:
1011 + num_face_sfs - Number of `PetscSF`s in the array
1012 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1013 
1014   Level: advanced
1015 
1016 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1017 @*/
1018 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1019 {
1020   DM_Plex *plex = (DM_Plex *)dm->data;
1021 
1022   PetscFunctionBegin;
1023   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1024   *face_sfs     = plex->periodic.face_sfs;
1025   *num_face_sfs = plex->periodic.num_face_sfs;
1026   PetscFunctionReturn(PETSC_SUCCESS);
1027 }
1028 
1029 /*@C
1030   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1031 
1032   Logically Collective
1033 
1034   Input Parameters:
1035 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1036 . n  - Number of transforms in array
1037 - t  - Array of 4x4 affine transformation basis.
1038 
1039   Level: advanced
1040 
1041   Notes:
1042   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1043   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1044   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1045   simple matrix multiplication.
1046 
1047   Although the interface accepts a general affine transform, only affine translation is supported at present.
1048 
1049   Developer Notes:
1050   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1051   adding GPU implementations to apply the G2L/L2G transforms.
1052 
1053 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1054 @*/
1055 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1056 {
1057   DM_Plex *plex = (DM_Plex *)dm->data;
1058 
1059   PetscFunctionBegin;
1060   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1061   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1062 
1063   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1064   for (PetscInt i = 0; i < n; i++) {
1065     for (PetscInt j = 0; j < 4; j++) {
1066       for (PetscInt k = 0; k < 4; k++) {
1067         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1068         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1069       }
1070     }
1071   }
1072   PetscFunctionReturn(PETSC_SUCCESS);
1073 }
1074