xref: /petsc/src/dm/impls/plex/plexsfc.c (revision ea7b3863513b7120da5bbc46fbc9df9d66f981c3)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988
21 static unsigned ZCodeSplit1(ZCode z)
22 {
23   z &= 0x1249249249249249;
24   z = (z | z >> 2) & 0x10c30c30c30c30c3;
25   z = (z | z >> 4) & 0x100f00f00f00f00f;
26   z = (z | z >> 8) & 0x1f0000ff0000ff;
27   z = (z | z >> 16) & 0x1f00000000ffff;
28   z = (z | z >> 32) & 0x1fffff;
29   return (unsigned)z;
30 }
31 
32 static ZCode ZEncode1(unsigned t)
33 {
34   ZCode z = t;
35   z &= 0x1fffff;
36   z = (z | z << 32) & 0x1f00000000ffff;
37   z = (z | z << 16) & 0x1f0000ff0000ff;
38   z = (z | z << 8) & 0x100f00f00f00f00f;
39   z = (z | z << 4) & 0x10c30c30c30c30c3;
40   z = (z | z << 2) & 0x1249249249249249;
41   return z;
42 }
43 
44 static Ijk ZCodeSplit(ZCode z)
45 {
46   Ijk c;
47   c.i = ZCodeSplit1(z >> 2);
48   c.j = ZCodeSplit1(z >> 1);
49   c.k = ZCodeSplit1(z >> 0);
50   return c;
51 }
52 
53 static ZCode ZEncode(Ijk c)
54 {
55   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
56   return z;
57 }
58 
59 static PetscBool IjkActive(Ijk extent, Ijk l)
60 {
61   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
62   return PETSC_FALSE;
63 }
64 
65 // If z is not the base of an octet (last 3 bits 0), return 0.
66 //
67 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
68 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
69 static ZCode ZStepOct(ZCode z)
70 {
71   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
72   ZCode step = 07;
73   for (; (z & step) == 0; step = (step << 3) | 07) { }
74   return step >> 3;
75 }
76 
77 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
78 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
79 {
80   PetscFunctionBegin;
81   layout->eextent.i = eextent[0];
82   layout->eextent.j = eextent[1];
83   layout->eextent.k = eextent[2];
84   layout->vextent.i = vextent[0];
85   layout->vextent.j = vextent[1];
86   layout->vextent.k = vextent[2];
87   layout->comm_size = size;
88   layout->zstarts   = NULL;
89   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
90 
91   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
92   ZCode    z           = 0;
93   layout->zstarts[0]   = 0;
94   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
95   for (PetscMPIInt r = 0; r < size; r++) {
96     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
97     for (count = 0; count < elems_needed; z++) {
98       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
99       for (ZCode s = skip;; s >>= 3) {
100         Ijk trial = ZCodeSplit(z + s);
101         if (IjkActive(layout->eextent, trial)) {
102           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
103           count += s + 1;
104           z += s;
105           break;
106         }
107         if (s == 0) { // the whole skip octet is inactive
108           z += skip;
109           break;
110         }
111       }
112     }
113     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
114     //
115     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
116     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
117     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
118     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
119     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
120     // complicate the job of identifying an owner and its offset.
121     //
122     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
123     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
124     // the issue:
125     //
126     // Consider this partition on rank 0 (left) and rank 1.
127     //
128     //    4 --------  5 -- 14 --10 -- 21 --11
129     //                |          |          |
130     // 7 -- 16 --  8  |          |          |
131     // |           |  3 -------  7 -------  9
132     // |           |             |          |
133     // 4 --------  6 ------ 10   |          |
134     // |           |         |   6 -- 16 -- 8
135     // |           |         |
136     // 3 ---11---  5 --18--  9
137     //
138     // The periodic face SF looks like
139     // [0] Number of roots=21, leaves=1, remote ranks=1
140     // [0] 16 <- (0,11)
141     // [1] Number of roots=22, leaves=2, remote ranks=2
142     // [1] 14 <- (0,18)
143     // [1] 21 <- (1,16)
144     //
145     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
146     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
147     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
148     //
149     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
150     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
151     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
152     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
153     // stranded vertices.
154     for (; z <= ZEncode(layout->vextent); z++) {
155       Ijk loc = ZCodeSplit(z);
156       if (IjkActive(layout->eextent, loc)) break;
157       z += ZStepOct(z);
158     }
159     layout->zstarts[r + 1] = z;
160   }
161   layout->zstarts[size] = ZEncode(layout->vextent);
162   PetscFunctionReturn(PETSC_SUCCESS);
163 }
164 
165 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
166 {
167   PetscInt remote_elem = 0;
168   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
169     Ijk loc = ZCodeSplit(rz);
170     if (IjkActive(layout->eextent, loc)) remote_elem++;
171     else rz += ZStepOct(rz);
172   }
173   return remote_elem;
174 }
175 
176 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
177 {
178   PetscInt lo = 0, hi = n;
179 
180   if (n == 0) return -1;
181   while (hi - lo > 1) {
182     PetscInt mid = lo + (hi - lo) / 2;
183     if (key < X[mid]) hi = mid;
184     else lo = mid;
185   }
186   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
187 }
188 
189 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
190 {
191   MPI_Comm    comm;
192   PetscInt    dim, vStart, vEnd;
193   PetscMPIInt size;
194   PetscSF     face_sfs[3];
195   PetscScalar transforms[3][4][4] = {{{0}}};
196 
197   PetscFunctionBegin;
198   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
199   PetscCallMPI(MPI_Comm_size(comm, &size));
200   PetscCall(DMGetDimension(dm, &dim));
201   const PetscInt csize = PetscPowInt(2, dim - 1);
202   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
203 
204   PetscInt num_directions = 0;
205   for (PetscInt direction = 0; direction < dim; direction++) {
206     PetscCount   num_faces;
207     PetscInt    *faces;
208     ZCode       *donor_verts, *donor_minz;
209     PetscSFNode *leaf;
210     PetscCount   num_multiroots = 0;
211     PetscInt     pStart, pEnd;
212     PetscBool    sorted;
213     PetscInt     inum_faces;
214 
215     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
216     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
217     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
218     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
219     PetscCall(PetscMalloc1(num_faces, &donor_minz));
220     PetscCall(PetscMalloc1(num_faces, &leaf));
221     for (PetscCount i = 0; i < num_faces; i++) {
222       ZCode minz = donor_verts[i * csize];
223 
224       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
225       donor_minz[i] = minz;
226     }
227     PetscCall(PetscIntCast(num_faces, &inum_faces));
228     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
229     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
230     // Checking for sorting is a cheap check that there are no duplicates.
231     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
232     for (PetscCount i = 0; i < num_faces;) {
233       ZCode       z           = donor_minz[i];
234       PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0;
235 
236       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
237       // Process all the vertices on this rank
238       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
239         Ijk loc = ZCodeSplit(rz);
240 
241         if (rz == z) {
242           leaf[i].rank  = remote_rank;
243           leaf[i].index = remote_count;
244           i++;
245           if (i == num_faces) break;
246           z = donor_minz[i];
247         }
248         if (IjkActive(layout->vextent, loc)) remote_count++;
249       }
250     }
251     PetscCall(PetscFree(donor_minz));
252     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
253     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
254     const PetscInt *my_donor_degree;
255     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
256     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
257 
258     for (PetscInt i = 0; i < vEnd - vStart; i++) {
259       num_multiroots += my_donor_degree[i];
260       if (my_donor_degree[i] == 0) continue;
261       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
262     }
263     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
264     PetscCount num_my_donors;
265 
266     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
267     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots);
268     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
269     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
270     for (PetscCount i = 0; i < num_my_donors; i++) {
271       PetscInt f = my_donors[i];
272       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;
273 
274       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
275       for (PetscInt j = 0; j < num_points; j++) {
276         PetscInt p = points[2 * j];
277         if (p < vStart || vEnd <= p) continue;
278         minv = PetscMin(minv, p);
279       }
280       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
281       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
282       my_donor_indices[minv - vStart] = f;
283     }
284     PetscCall(PetscMalloc1(num_faces, &donor_indices));
285     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
286     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
287     PetscCall(PetscFree(my_donor_indices));
288     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
289     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
290     PetscCall(PetscFree(donor_indices));
291     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
292     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
293     {
294       char face_sf_name[PETSC_MAX_PATH_LEN];
295       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
296       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
297     }
298 
299     transforms[num_directions][0][0]         = 1;
300     transforms[num_directions][1][1]         = 1;
301     transforms[num_directions][2][2]         = 1;
302     transforms[num_directions][3][3]         = 1;
303     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
304     num_directions++;
305   }
306 
307   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
308   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
309 
310   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
311   PetscFunctionReturn(PETSC_SUCCESS);
312 }
313 
314 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
315 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
316 // We use this crude approach here so we don't have to write new GPU kernels yet.
317 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
318 {
319   PetscFunctionBegin;
320   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
321   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
322     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
323     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
324   }
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
329 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
330 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
331 //
332 // Output Arguments:
333 //
334 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
335 //   can be used to create a global section and section SF.
336 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
337 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
338 //
339 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
340 {
341   MPI_Comm           comm;
342   PetscMPIInt        rank;
343   PetscSF            point_sf;
344   PetscInt           nroots, nleaves;
345   const PetscInt    *filocal;
346   const PetscSFNode *firemote;
347 
348   PetscFunctionBegin;
349   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
350   PetscCallMPI(MPI_Comm_rank(comm, &rank));
351   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
352   PetscCall(PetscMalloc1(num_face_sfs, is_points));
353 
354   for (PetscInt f = 0; f < num_face_sfs; f++) {
355     PetscSF   face_sf = face_sfs[f];
356     PetscInt *rootdata, *leafdata;
357 
358     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
359     PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
360     for (PetscInt i = 0; i < nleaves; i++) {
361       PetscInt point = filocal[i], cl_size, *closure = NULL;
362       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
363       leafdata[point] = cl_size - 1;
364       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
365     }
366     PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
367     PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
368 
369     PetscInt root_offset = 0;
370     for (PetscInt p = 0; p < nroots; p++) {
371       const PetscInt *donor_dof = rootdata + nroots;
372       if (donor_dof[p] == 0) {
373         rootdata[2 * p]     = -1;
374         rootdata[2 * p + 1] = -1;
375         continue;
376       }
377       PetscInt  cl_size;
378       PetscInt *closure = NULL;
379       PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
380       // cl_size - 1 = points not including self
381       PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
382       rootdata[2 * p]     = root_offset;
383       rootdata[2 * p + 1] = cl_size - 1;
384       root_offset += cl_size - 1;
385       PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
386     }
387     PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
388     PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
389     // Count how many leaves we need to communicate the closures
390     PetscInt leaf_offset = 0;
391     for (PetscInt i = 0; i < nleaves; i++) {
392       PetscInt point = filocal[i];
393       if (leafdata[2 * point + 1] < 0) continue;
394       leaf_offset += leafdata[2 * point + 1];
395     }
396 
397     PetscSFNode *closure_leaf;
398     PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
399     leaf_offset = 0;
400     for (PetscInt i = 0; i < nleaves; i++) {
401       PetscInt point   = filocal[i];
402       PetscInt cl_size = leafdata[2 * point + 1];
403       if (cl_size < 0) continue;
404       for (PetscInt j = 0; j < cl_size; j++) {
405         closure_leaf[leaf_offset].rank  = firemote[i].rank;
406         closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
407         leaf_offset++;
408       }
409     }
410 
411     PetscSF sf_closure;
412     PetscCall(PetscSFCreate(comm, &sf_closure));
413     PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
414 
415     PetscSFNode *leaf_donor_closure;
416     { // Pack root buffer with owner for every point in the root cones
417       PetscSFNode       *donor_closure;
418       const PetscInt    *pilocal;
419       const PetscSFNode *piremote;
420       PetscInt           npoints;
421 
422       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
423       PetscCall(PetscCalloc1(root_offset, &donor_closure));
424       root_offset = 0;
425       for (PetscInt p = 0; p < nroots; p++) {
426         if (rootdata[2 * p] < 0) continue;
427         PetscInt  cl_size;
428         PetscInt *closure = NULL;
429         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
430         for (PetscInt j = 1; j < cl_size; j++) {
431           PetscInt c = closure[2 * j];
432           if (pilocal) {
433             PetscInt found = -1;
434             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
435             if (found >= 0) {
436               donor_closure[root_offset++] = piremote[found];
437               continue;
438             }
439           }
440           // we own c
441           donor_closure[root_offset].rank  = rank;
442           donor_closure[root_offset].index = c;
443           root_offset++;
444         }
445         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
446       }
447 
448       PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
449       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
450       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
451       PetscCall(PetscSFDestroy(&sf_closure));
452       PetscCall(PetscFree(donor_closure));
453     }
454 
455     PetscSFNode *new_iremote;
456     PetscCall(PetscCalloc1(nroots, &new_iremote));
457     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
458     // Walk leaves and match vertices
459     leaf_offset = 0;
460     for (PetscInt i = 0; i < nleaves; i++) {
461       PetscInt  point   = filocal[i], cl_size;
462       PetscInt *closure = NULL;
463       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
464       for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
465         PetscInt    c  = closure[2 * j];
466         PetscSFNode lc = leaf_donor_closure[leaf_offset];
467         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
468         if (new_iremote[c].rank == -1) {
469           new_iremote[c] = lc;
470         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
471         leaf_offset++;
472       }
473       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
474     }
475     PetscCall(PetscFree(leaf_donor_closure));
476 
477     // Include face points in closure SF
478     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
479     // consolidate leaves
480     PetscInt num_new_leaves = 0;
481     for (PetscInt i = 0; i < nroots; i++) {
482       if (new_iremote[i].rank == -1) continue;
483       new_iremote[num_new_leaves] = new_iremote[i];
484       leafdata[num_new_leaves]    = i;
485       num_new_leaves++;
486     }
487     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
488 
489     PetscSF csf;
490     PetscCall(PetscSFCreate(comm, &csf));
491     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
492     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
493     PetscCall(PetscFree2(rootdata, leafdata));
494 
495     PetscInt npoints;
496     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
497     if (npoints < 0) { // empty point_sf
498       *closure_sf = csf;
499     } else {
500       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
501       PetscCall(PetscSFDestroy(&csf));
502     }
503     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
504     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
505   }
506   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
511 {
512   DM_Plex *plex = (DM_Plex *)dm->data;
513 
514   PetscFunctionBegin;
515   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
516   if (sf) *sf = plex->periodic.composed_sf;
517   PetscFunctionReturn(PETSC_SUCCESS);
518 }
519 
520 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
521 {
522   DM_Plex    *plex = (DM_Plex *)old_dm->data;
523   PetscSF     sf_point, *new_face_sfs;
524   PetscMPIInt rank;
525 
526   PetscFunctionBegin;
527   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
528   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
529   PetscCall(DMGetPointSF(dm, &sf_point));
530   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
531 
532   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
533     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
534     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
535     const PetscInt    *old_local, *point_local;
536     const PetscSFNode *old_remote, *point_remote;
537 
538     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
539     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
540     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
541     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
542     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
543 
544     // Fill new_leafdata with new owners of all points
545     for (PetscInt i = 0; i < new_npoints; i++) {
546       new_leafdata[i].rank  = rank;
547       new_leafdata[i].index = i;
548     }
549     for (PetscInt i = 0; i < point_nleaf; i++) {
550       PetscInt j      = point_local[i];
551       new_leafdata[j] = point_remote[i];
552     }
553     // REPLACE is okay because every leaf agrees about the new owners
554     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
555     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
556     // rootdata now contains the new owners
557 
558     // Send to leaves of old space
559     for (PetscInt i = 0; i < old_npoints; i++) {
560       leafdata[i].rank  = -1;
561       leafdata[i].index = -1;
562     }
563     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
564     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
565 
566     // Send to new leaf space
567     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
568     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
569 
570     PetscInt     nface = 0, *new_local;
571     PetscSFNode *new_remote;
572     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
573     PetscCall(PetscMalloc1(nface, &new_local));
574     PetscCall(PetscMalloc1(nface, &new_remote));
575     nface = 0;
576     for (PetscInt i = 0; i < new_npoints; i++) {
577       if (new_leafdata[i].rank == -1) continue;
578       new_local[nface]  = i;
579       new_remote[nface] = new_leafdata[i];
580       nface++;
581     }
582     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
583     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
584     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
585     {
586       char new_face_sf_name[PETSC_MAX_PATH_LEN];
587       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
588       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
589     }
590   }
591 
592   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
593   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
594   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
595   PetscCall(PetscFree(new_face_sfs));
596   PetscFunctionReturn(PETSC_SUCCESS);
597 }
598 
599 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
600 {
601   DM_Plex   *plex = (DM_Plex *)dm->data;
602   PetscCount count;
603   IS         isdof;
604   PetscInt   dim;
605 
606   PetscFunctionBegin;
607   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
608   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
609   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
610 
611   PetscCall(DMGetDimension(dm, &dim));
612   dm->periodic.num_affines = plex->periodic.num_face_sfs;
613   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
614 
615   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
616     PetscInt        npoints, vsize, isize;
617     const PetscInt *points;
618     IS              is = plex->periodic.periodic_points[f];
619     PetscSegBuffer  seg;
620     PetscSection    section;
621     PetscInt       *ind;
622     Vec             L, P;
623     VecType         vec_type;
624     VecScatter      scatter;
625     PetscScalar    *x;
626 
627     PetscCall(DMGetLocalSection(dm, &section));
628     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
629     PetscCall(ISGetSize(is, &npoints));
630     PetscCall(ISGetIndices(is, &points));
631     for (PetscInt i = 0; i < npoints; i++) {
632       PetscInt point = points[i], off, dof;
633 
634       PetscCall(PetscSectionGetOffset(section, point, &off));
635       PetscCall(PetscSectionGetDof(section, point, &dof));
636       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
637       for (PetscInt j = 0; j < dof / dim; j++) {
638         PetscInt *slot;
639 
640         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
641         *slot = off / dim + j;
642       }
643     }
644     PetscCall(PetscSegBufferGetSize(seg, &count));
645     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
646     PetscCall(PetscSegBufferDestroy(&seg));
647     PetscCall(PetscIntCast(count, &isize));
648     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));
649 
650     PetscCall(PetscIntCast(count * dim, &vsize));
651     PetscCall(DMGetLocalVector(dm, &L));
652     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
653     PetscCall(VecSetSizes(P, vsize, vsize));
654     PetscCall(VecGetType(L, &vec_type));
655     PetscCall(VecSetType(P, vec_type));
656     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
657     PetscCall(DMRestoreLocalVector(dm, &L));
658     PetscCall(ISDestroy(&isdof));
659 
660     PetscCall(VecGetArrayWrite(P, &x));
661     for (PetscCount i = 0; i < count; i++) {
662       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
663     }
664     PetscCall(VecRestoreArrayWrite(P, &x));
665 
666     dm->periodic.affine_to_local[f] = scatter;
667     dm->periodic.affine[f]          = P;
668   }
669   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
670   PetscFunctionReturn(PETSC_SUCCESS);
671 }
672 
673 // We'll just orient all the edges, though only periodic boundary edges need orientation
674 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
675 {
676   PetscInt dim, eStart, eEnd;
677 
678   PetscFunctionBegin;
679   PetscCall(DMGetDimension(dm, &dim));
680   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
681   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
682   for (PetscInt e = eStart; e < eEnd; e++) {
683     const PetscInt *cone;
684     PetscCall(DMPlexGetCone(dm, e, &cone));
685     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
686   }
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
691 {
692   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
693   const Ijk closure_1[] = {
694     {0, 0, 0},
695     {1, 0, 0},
696   };
697   const Ijk closure_2[] = {
698     {0, 0, 0},
699     {1, 0, 0},
700     {1, 1, 0},
701     {0, 1, 0},
702   };
703   const Ijk closure_3[] = {
704     {0, 0, 0},
705     {0, 1, 0},
706     {1, 1, 0},
707     {1, 0, 0},
708     {0, 0, 1},
709     {1, 0, 1},
710     {1, 1, 1},
711     {0, 1, 1},
712   };
713   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
714   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
715   const PetscInt        face_marker_1[]   = {1, 2};
716   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
717   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
718   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
719   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
720   // These orientations can be determined by examining cones of a reference quad and hex element.
721   const PetscInt        face_orient_1[]   = {0, 0};
722   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
723   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
724   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
725 
726   PetscFunctionBegin;
727   PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
728   PetscAssertPointer(dm, 1);
729   PetscValidLogicalCollectiveInt(dm, dim, 2);
730   PetscCall(DMSetDimension(dm, dim));
731   PetscMPIInt rank, size;
732   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
733   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
734   for (PetscInt i = 0; i < dim; i++) {
735     eextent[i] = faces[i];
736     vextent[i] = faces[i] + 1;
737   }
738   ZLayout layout;
739   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
740   PetscZSet vset; // set of all vertices in the closure of the owned elements
741   PetscCall(PetscZSetCreate(&vset));
742   PetscInt local_elems = 0;
743   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
744     Ijk loc = ZCodeSplit(z);
745     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
746     else {
747       z += ZStepOct(z);
748       continue;
749     }
750     if (IjkActive(layout.eextent, loc)) {
751       local_elems++;
752       // Add all neighboring vertices to set
753       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
754         Ijk   inc  = closure_dim[dim][n];
755         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
756         ZCode v    = ZEncode(nloc);
757         PetscCall(PetscZSetAdd(vset, v));
758       }
759     }
760   }
761   PetscInt local_verts, off = 0;
762   ZCode   *vert_z;
763   PetscCall(PetscZSetGetSize(vset, &local_verts));
764   PetscCall(PetscMalloc1(local_verts, &vert_z));
765   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
766   PetscCall(PetscZSetDestroy(&vset));
767   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
768   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
769 
770   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
771   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
772   PetscCall(DMSetUp(dm));
773   {
774     PetscInt e = 0;
775     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
776       Ijk loc = ZCodeSplit(z);
777       if (!IjkActive(layout.eextent, loc)) {
778         z += ZStepOct(z);
779         continue;
780       }
781       PetscInt cone[8], orient[8] = {0};
782       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
783         Ijk      inc  = closure_dim[dim][n];
784         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
785         ZCode    v    = ZEncode(nloc);
786         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
787         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
788         cone[n] = local_elems + ci;
789       }
790       PetscCall(DMPlexSetCone(dm, e, cone));
791       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
792       e++;
793     }
794   }
795 
796   PetscCall(DMPlexSymmetrize(dm));
797   PetscCall(DMPlexStratify(dm));
798 
799   { // Create point SF
800     PetscSF sf;
801     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
802     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
803     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
804     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
805     PetscInt    *local_ghosts;
806     PetscSFNode *ghosts;
807     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
808     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
809     for (PetscInt i = 0; i < num_ghosts;) {
810       ZCode       z           = vert_z[owned_verts + i];
811       PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0;
812       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
813       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
814 
815       // Count the elements on remote_rank
816       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
817 
818       // Traverse vertices and make ghost links
819       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
820         Ijk loc = ZCodeSplit(rz);
821         if (rz == z) {
822           local_ghosts[i] = local_elems + owned_verts + i;
823           ghosts[i].rank  = remote_rank;
824           ghosts[i].index = remote_elem + remote_count;
825           i++;
826           if (i == num_ghosts) break;
827           z = vert_z[owned_verts + i];
828         }
829         if (IjkActive(layout.vextent, loc)) remote_count++;
830         else rz += ZStepOct(rz);
831       }
832     }
833     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
834     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
835     PetscCall(DMSetPointSF(dm, sf));
836     PetscCall(PetscSFDestroy(&sf));
837   }
838   {
839     Vec          coordinates;
840     PetscScalar *coords;
841     PetscSection coord_section;
842     PetscInt     coord_size;
843     PetscCall(DMGetCoordinateSection(dm, &coord_section));
844     PetscCall(PetscSectionSetNumFields(coord_section, 1));
845     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
846     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
847     for (PetscInt v = 0; v < local_verts; v++) {
848       PetscInt point = local_elems + v;
849       PetscCall(PetscSectionSetDof(coord_section, point, dim));
850       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
851     }
852     PetscCall(PetscSectionSetUp(coord_section));
853     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
854     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
855     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
856     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
857     PetscCall(VecSetBlockSize(coordinates, dim));
858     PetscCall(VecSetType(coordinates, VECSTANDARD));
859     PetscCall(VecGetArray(coordinates, &coords));
860     for (PetscInt v = 0; v < local_verts; v++) {
861       Ijk loc             = ZCodeSplit(vert_z[v]);
862       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
863       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
864       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
865     }
866     PetscCall(VecRestoreArray(coordinates, &coords));
867     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
868     PetscCall(VecDestroy(&coordinates));
869   }
870   if (interpolate) {
871     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
872     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
873     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
874     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
875     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
876     // be needed in a general CGNS reader, for example.
877     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
878 
879     DMLabel label;
880     PetscCall(DMCreateLabel(dm, "Face Sets"));
881     PetscCall(DMGetLabel(dm, "Face Sets", &label));
882     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
883     for (PetscInt i = 0; i < 3; i++) {
884       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
885       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
886       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
887     }
888     PetscInt fStart, fEnd, vStart, vEnd;
889     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
890     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
891     for (PetscInt f = fStart; f < fEnd; f++) {
892       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
893       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
894       PetscInt bc_count[6] = {0};
895       for (PetscInt i = 0; i < npoints; i++) {
896         PetscInt p = points[2 * i];
897         if (p < vStart || vEnd <= p) continue;
898         fverts[num_fverts++] = p;
899         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
900         // Convention here matches DMPlexCreateCubeMesh_Internal
901         bc_count[0] += loc.i == 0;
902         bc_count[1] += loc.i == layout.vextent.i - 1;
903         bc_count[2] += loc.j == 0;
904         bc_count[3] += loc.j == layout.vextent.j - 1;
905         bc_count[4] += loc.k == 0;
906         bc_count[5] += loc.k == layout.vextent.k - 1;
907       }
908       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
909       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
910         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
911           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
912           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
913             PetscInt *put;
914             if (bc % 2 == 0) { // donor face; no label
915               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
916               *put = f;
917             } else { // periodic face
918               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
919               *put = f;
920               ZCode *zput;
921               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
922               for (PetscInt i = 0; i < num_fverts; i++) {
923                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
924                 switch (bc / 2) {
925                 case 0:
926                   loc.i = 0;
927                   break;
928                 case 1:
929                   loc.j = 0;
930                   break;
931                 case 2:
932                   loc.k = 0;
933                   break;
934                 }
935                 *zput++ = ZEncode(loc);
936               }
937             }
938             continue;
939           }
940           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
941           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
942           bc_match++;
943         }
944       }
945     }
946     // Ensure that the Coordinate DM has our new boundary labels
947     DM cdm;
948     PetscCall(DMGetCoordinateDM(dm, &cdm));
949     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
950     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
951       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
952     }
953     for (PetscInt i = 0; i < 3; i++) {
954       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
955       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
956       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
957     }
958   }
959   PetscCall(PetscFree(layout.zstarts));
960   PetscCall(PetscFree(vert_z));
961   PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
962   PetscFunctionReturn(PETSC_SUCCESS);
963 }
964 
965 /*@
966   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
967 
968   Logically Collective
969 
970   Input Parameters:
971 + dm           - The `DMPLEX` on which to set periodicity
972 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
973 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
974 
975   Level: advanced
976 
977   Note:
978   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
979   and locally, but are paired when creating a global dof space.
980 
981 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
982 @*/
983 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
984 {
985   DM_Plex *plex = (DM_Plex *)dm->data;
986 
987   PetscFunctionBegin;
988   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
989   if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
990   if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
991 
992   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
993 
994   if (plex->periodic.num_face_sfs > 0) {
995     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
996     PetscCall(PetscFree(plex->periodic.face_sfs));
997   }
998 
999   plex->periodic.num_face_sfs = num_face_sfs;
1000   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
1001   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
1002 
1003   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
1004   if (cdm) {
1005     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
1006     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
1007   }
1008   PetscFunctionReturn(PETSC_SUCCESS);
1009 }
1010 
1011 /*@C
1012   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
1013 
1014   Logically Collective
1015 
1016   Input Parameter:
1017 . dm - The `DMPLEX` for which to obtain periodic relation
1018 
1019   Output Parameters:
1020 + num_face_sfs - Number of `PetscSF`s in the array
1021 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1022 
1023   Level: advanced
1024 
1025 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1026 @*/
1027 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1028 {
1029   DM_Plex *plex = (DM_Plex *)dm->data;
1030 
1031   PetscFunctionBegin;
1032   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1033   *face_sfs     = plex->periodic.face_sfs;
1034   *num_face_sfs = plex->periodic.num_face_sfs;
1035   PetscFunctionReturn(PETSC_SUCCESS);
1036 }
1037 
1038 /*@C
1039   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1040 
1041   Logically Collective
1042 
1043   Input Parameters:
1044 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1045 . n  - Number of transforms in array
1046 - t  - Array of 4x4 affine transformation basis.
1047 
1048   Level: advanced
1049 
1050   Notes:
1051   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1052   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1053   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1054   simple matrix multiplication.
1055 
1056   Although the interface accepts a general affine transform, only affine translation is supported at present.
1057 
1058   Developer Notes:
1059   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1060   adding GPU implementations to apply the G2L/L2G transforms.
1061 
1062 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1063 @*/
1064 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1065 {
1066   DM_Plex *plex = (DM_Plex *)dm->data;
1067 
1068   PetscFunctionBegin;
1069   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1070   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1071 
1072   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1073   for (PetscInt i = 0; i < n; i++) {
1074     for (PetscInt j = 0; j < 4; j++) {
1075       for (PetscInt k = 0; k < 4; k++) {
1076         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1077         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1078       }
1079     }
1080   }
1081   PetscFunctionReturn(PETSC_SUCCESS);
1082 }
1083