xref: /petsc/src/dm/impls/plex/plexsfc.c (revision 1fca310d5bbec1c360cd9da8976f4e0178ab9137)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 static unsigned ZCodeSplit1(ZCode z)
21 {
22   z = ((z & 01001001001001001) | ((z >> 2) & 02002002002002002) | ((z >> 4) & 04004004004004004));
23   z = (z | (z >> 6) | (z >> 12)) & 0000000777000000777;
24   z = (z | (z >> 18)) & 0777777;
25   return (unsigned)z;
26 }
27 
28 static ZCode ZEncode1(unsigned t)
29 {
30   ZCode z = t;
31   z       = (z | (z << 18)) & 0777000000777;
32   z       = (z | (z << 6) | (z << 12)) & 07007007007007007;
33   z       = (z | (z << 2) | (z << 4)) & 0111111111111111111;
34   return z;
35 }
36 
37 static Ijk ZCodeSplit(ZCode z)
38 {
39   Ijk c;
40   c.i = ZCodeSplit1(z >> 2);
41   c.j = ZCodeSplit1(z >> 1);
42   c.k = ZCodeSplit1(z >> 0);
43   return c;
44 }
45 
46 static ZCode ZEncode(Ijk c)
47 {
48   ZCode z = (ZEncode1(c.i) << 2) | (ZEncode1(c.j) << 1) | ZEncode1(c.k);
49   return z;
50 }
51 
52 static PetscBool IjkActive(Ijk extent, Ijk l)
53 {
54   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
55   return PETSC_FALSE;
56 }
57 
58 // If z is not the base of an octet (last 3 bits 0), return 0.
59 //
60 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
61 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
62 static ZCode ZStepOct(ZCode z)
63 {
64   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
65   ZCode step = 07;
66   for (; (z & step) == 0; step = (step << 3) | 07) { }
67   return step >> 3;
68 }
69 
70 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
71 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
72 {
73   PetscFunctionBegin;
74   layout->eextent.i = eextent[0];
75   layout->eextent.j = eextent[1];
76   layout->eextent.k = eextent[2];
77   layout->vextent.i = vextent[0];
78   layout->vextent.j = vextent[1];
79   layout->vextent.k = vextent[2];
80   layout->comm_size = size;
81   layout->zstarts   = NULL;
82   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
83 
84   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
85   ZCode    z           = 0;
86   layout->zstarts[0]   = 0;
87   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
88   for (PetscMPIInt r = 0; r < size; r++) {
89     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
90     for (count = 0; count < elems_needed; z++) {
91       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
92       for (ZCode s = skip;; s >>= 3) {
93         Ijk trial = ZCodeSplit(z + s);
94         if (IjkActive(layout->eextent, trial)) {
95           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
96           count += s + 1;
97           z += s;
98           break;
99         }
100         if (s == 0) { // the whole skip octet is inactive
101           z += skip;
102           break;
103         }
104       }
105     }
106     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
107     //
108     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
109     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
110     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
111     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
112     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
113     // complicate the job of identifying an owner and its offset.
114     //
115     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
116     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
117     // the issue:
118     //
119     // Consider this partition on rank 0 (left) and rank 1.
120     //
121     //    4 --------  5 -- 14 --10 -- 21 --11
122     //                |          |          |
123     // 7 -- 16 --  8  |          |          |
124     // |           |  3 -------  7 -------  9
125     // |           |             |          |
126     // 4 --------  6 ------ 10   |          |
127     // |           |         |   6 -- 16 -- 8
128     // |           |         |
129     // 3 ---11---  5 --18--  9
130     //
131     // The periodic face SF looks like
132     // [0] Number of roots=21, leaves=1, remote ranks=1
133     // [0] 16 <- (0,11)
134     // [1] Number of roots=22, leaves=2, remote ranks=2
135     // [1] 14 <- (0,18)
136     // [1] 21 <- (1,16)
137     //
138     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
139     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
140     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
141     //
142     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
143     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
144     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
145     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
146     // stranded vertices.
147     for (; z <= ZEncode(layout->vextent); z++) {
148       Ijk loc = ZCodeSplit(z);
149       if (IjkActive(layout->eextent, loc)) break;
150       z += ZStepOct(z);
151     }
152     layout->zstarts[r + 1] = z;
153   }
154   layout->zstarts[size] = ZEncode(layout->vextent);
155   PetscFunctionReturn(PETSC_SUCCESS);
156 }
157 
158 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
159 {
160   PetscInt remote_elem = 0;
161   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
162     Ijk loc = ZCodeSplit(rz);
163     if (IjkActive(layout->eextent, loc)) remote_elem++;
164     else rz += ZStepOct(rz);
165   }
166   return remote_elem;
167 }
168 
169 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
170 {
171   PetscInt lo = 0, hi = n;
172 
173   if (n == 0) return -1;
174   while (hi - lo > 1) {
175     PetscInt mid = lo + (hi - lo) / 2;
176     if (key < X[mid]) hi = mid;
177     else lo = mid;
178   }
179   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
180 }
181 
182 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
183 {
184   MPI_Comm    comm;
185   PetscInt    dim, vStart, vEnd;
186   PetscMPIInt size;
187   PetscSF     face_sfs[3];
188   PetscScalar transforms[3][4][4] = {{{0}}};
189 
190   PetscFunctionBegin;
191   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
192   PetscCallMPI(MPI_Comm_size(comm, &size));
193   PetscCall(DMGetDimension(dm, &dim));
194   const PetscInt csize = PetscPowInt(2, dim - 1);
195   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
196 
197   PetscInt num_directions = 0;
198   for (PetscInt direction = 0; direction < dim; direction++) {
199     size_t       num_faces;
200     PetscInt    *faces;
201     ZCode       *donor_verts, *donor_minz;
202     PetscSFNode *leaf;
203 
204     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
205     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
206     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
207     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
208     PetscCall(PetscMalloc1(num_faces, &donor_minz));
209     PetscCall(PetscMalloc1(num_faces, &leaf));
210     for (PetscInt i = 0; i < (PetscInt)num_faces; i++) {
211       ZCode minz = donor_verts[i * csize];
212       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
213       donor_minz[i] = minz;
214     }
215     {
216       PetscBool sorted;
217       PetscCall(PetscSortedInt64(num_faces, (const PetscInt64 *)donor_minz, &sorted));
218       // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
219       // Checking for sorting is a cheap check that there are no duplicates.
220       PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
221     }
222     for (PetscInt i = 0; i < (PetscInt)num_faces;) {
223       ZCode    z           = donor_minz[i];
224       PetscInt remote_rank = ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0;
225       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
226       // Process all the vertices on this rank
227       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
228         Ijk loc = ZCodeSplit(rz);
229         if (rz == z) {
230           leaf[i].rank  = remote_rank;
231           leaf[i].index = remote_count;
232           i++;
233           if (i == (PetscInt)num_faces) break;
234           z = donor_minz[i];
235         }
236         if (IjkActive(layout->vextent, loc)) remote_count++;
237       }
238     }
239     PetscCall(PetscFree(donor_minz));
240     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
241     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, num_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
242     const PetscInt *my_donor_degree;
243     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
244     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
245     PetscInt num_multiroots = 0;
246     for (PetscInt i = 0; i < vEnd - vStart; i++) {
247       num_multiroots += my_donor_degree[i];
248       if (my_donor_degree[i] == 0) continue;
249       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
250     }
251     PetscInt *my_donors, *donor_indices, *my_donor_indices;
252     size_t    num_my_donors;
253     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
254     PetscCheck((PetscInt)num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request does not match expected donors");
255     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
256     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
257     for (PetscInt i = 0; i < (PetscInt)num_my_donors; i++) {
258       PetscInt f = my_donors[i];
259       PetscInt num_points, *points = NULL, minv = PETSC_MAX_INT;
260       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
261       for (PetscInt j = 0; j < num_points; j++) {
262         PetscInt p = points[2 * j];
263         if (p < vStart || vEnd <= p) continue;
264         minv = PetscMin(minv, p);
265       }
266       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
267       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
268       my_donor_indices[minv - vStart] = f;
269     }
270     PetscCall(PetscMalloc1(num_faces, &donor_indices));
271     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
272     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
273     PetscCall(PetscFree(my_donor_indices));
274     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
275     for (PetscInt i = 0; i < (PetscInt)num_faces; i++) leaf[i].index = donor_indices[i];
276     PetscCall(PetscFree(donor_indices));
277     PetscInt pStart, pEnd;
278     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
279     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, num_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
280     {
281       char face_sf_name[PETSC_MAX_PATH_LEN];
282       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
283       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
284     }
285 
286     transforms[num_directions][0][0]         = 1;
287     transforms[num_directions][1][1]         = 1;
288     transforms[num_directions][2][2]         = 1;
289     transforms[num_directions][3][3]         = 1;
290     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
291     num_directions++;
292   }
293 
294   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
295   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
296 
297   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
302 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
303 // We use this crude approach here so we don't have to write new GPU kernels yet.
304 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
305 {
306   PetscFunctionBegin;
307   PetscCall(VecScatterBegin(dm->periodic.affine_to_local, dm->periodic.affine, l, ADD_VALUES, SCATTER_FORWARD));
308   PetscCall(VecScatterEnd(dm->periodic.affine_to_local, dm->periodic.affine, l, ADD_VALUES, SCATTER_FORWARD));
309   PetscFunctionReturn(PETSC_SUCCESS);
310 }
311 
312 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
313 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
314 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
315 //
316 // Output Arguments:
317 //
318 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
319 //   can be used to create a global section and section SF.
320 // - is_points - index set for just the points in the closure of `face_sf`. These may be used to apply an affine
321 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
322 //
323 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscSF face_sf, PetscSF *closure_sf, IS *is_points)
324 {
325   MPI_Comm           comm;
326   PetscInt           nroots, nleaves, npoints;
327   const PetscInt    *filocal, *pilocal;
328   const PetscSFNode *firemote, *piremote;
329   PetscMPIInt        rank;
330   PetscSF            point_sf;
331 
332   PetscFunctionBegin;
333   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
334   PetscCallMPI(MPI_Comm_rank(comm, &rank));
335   PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
336   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
337   PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
338   PetscInt *rootdata, *leafdata;
339   PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
340   for (PetscInt i = 0; i < nleaves; i++) {
341     PetscInt point = filocal[i], cl_size, *closure = NULL;
342     PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
343     leafdata[point] = cl_size - 1;
344     PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
345   }
346   PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
347   PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
348 
349   PetscInt root_offset = 0;
350   for (PetscInt p = 0; p < nroots; p++) {
351     const PetscInt *donor_dof = rootdata + nroots;
352     if (donor_dof[p] == 0) {
353       rootdata[2 * p]     = -1;
354       rootdata[2 * p + 1] = -1;
355       continue;
356     }
357     PetscInt  cl_size;
358     PetscInt *closure = NULL;
359     PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
360     // cl_size - 1 = points not including self
361     PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
362     rootdata[2 * p]     = root_offset;
363     rootdata[2 * p + 1] = cl_size - 1;
364     root_offset += cl_size - 1;
365     PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
366   }
367   PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
368   PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
369   // Count how many leaves we need to communicate the closures
370   PetscInt leaf_offset = 0;
371   for (PetscInt i = 0; i < nleaves; i++) {
372     PetscInt point = filocal[i];
373     if (leafdata[2 * point + 1] < 0) continue;
374     leaf_offset += leafdata[2 * point + 1];
375   }
376 
377   PetscSFNode *closure_leaf;
378   PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
379   leaf_offset = 0;
380   for (PetscInt i = 0; i < nleaves; i++) {
381     PetscInt point   = filocal[i];
382     PetscInt cl_size = leafdata[2 * point + 1];
383     if (cl_size < 0) continue;
384     for (PetscInt j = 0; j < cl_size; j++) {
385       closure_leaf[leaf_offset].rank  = firemote[i].rank;
386       closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
387       leaf_offset++;
388     }
389   }
390 
391   PetscSF sf_closure;
392   PetscCall(PetscSFCreate(comm, &sf_closure));
393   PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
394 
395   // Pack root buffer with owner for every point in the root cones
396   PetscSFNode *donor_closure;
397   PetscCall(PetscCalloc1(root_offset, &donor_closure));
398   root_offset = 0;
399   for (PetscInt p = 0; p < nroots; p++) {
400     if (rootdata[2 * p] < 0) continue;
401     PetscInt  cl_size;
402     PetscInt *closure = NULL;
403     PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
404     for (PetscInt j = 1; j < cl_size; j++) {
405       PetscInt c = closure[2 * j];
406       if (pilocal) {
407         PetscInt found = -1;
408         if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
409         if (found >= 0) {
410           donor_closure[root_offset++] = piremote[found];
411           continue;
412         }
413       }
414       // we own c
415       donor_closure[root_offset].rank  = rank;
416       donor_closure[root_offset].index = c;
417       root_offset++;
418     }
419     PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
420   }
421 
422   PetscSFNode *leaf_donor_closure;
423   PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
424   PetscCall(PetscSFBcastBegin(sf_closure, MPIU_2INT, donor_closure, leaf_donor_closure, MPI_REPLACE));
425   PetscCall(PetscSFBcastEnd(sf_closure, MPIU_2INT, donor_closure, leaf_donor_closure, MPI_REPLACE));
426   PetscCall(PetscSFDestroy(&sf_closure));
427   PetscCall(PetscFree(donor_closure));
428 
429   PetscSFNode *new_iremote;
430   PetscCall(PetscCalloc1(nroots, &new_iremote));
431   for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
432   // Walk leaves and match vertices
433   leaf_offset = 0;
434   for (PetscInt i = 0; i < nleaves; i++) {
435     PetscInt  point   = filocal[i], cl_size;
436     PetscInt *closure = NULL;
437     PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
438     for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
439       PetscInt    c  = closure[2 * j];
440       PetscSFNode lc = leaf_donor_closure[leaf_offset];
441       // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
442       if (new_iremote[c].rank == -1) {
443         new_iremote[c] = lc;
444       } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
445       leaf_offset++;
446     }
447     PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
448   }
449   PetscCall(PetscFree(leaf_donor_closure));
450 
451   // Include face points in closure SF
452   for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
453   // consolidate leaves
454   PetscInt num_new_leaves = 0;
455   for (PetscInt i = 0; i < nroots; i++) {
456     if (new_iremote[i].rank == -1) continue;
457     new_iremote[num_new_leaves] = new_iremote[i];
458     leafdata[num_new_leaves]    = i;
459     num_new_leaves++;
460   }
461   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, is_points));
462 
463   PetscSF csf;
464   PetscCall(PetscSFCreate(comm, &csf));
465   PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
466   PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
467   PetscCall(PetscFree2(rootdata, leafdata));
468 
469   if (npoints < 0) { // empty point_sf
470     *closure_sf = csf;
471   } else {
472     PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
473     PetscCall(PetscSFDestroy(&csf));
474   }
475   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
476   PetscFunctionReturn(PETSC_SUCCESS);
477 }
478 
479 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
480 {
481   DM_Plex *plex = (DM_Plex *)dm->data;
482 
483   PetscFunctionBegin;
484   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.face_sfs[0], &plex->periodic.composed_sf, &plex->periodic.periodic_points));
485   if (sf) *sf = plex->periodic.composed_sf;
486   PetscFunctionReturn(PETSC_SUCCESS);
487 }
488 
489 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
490 {
491   DM_Plex    *plex = (DM_Plex *)old_dm->data;
492   PetscSF     sf_point;
493   PetscMPIInt rank;
494 
495   PetscFunctionBegin;
496   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
497   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
498   PetscCall(DMGetPointSF(dm, &sf_point));
499   PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
500   PetscSFNode       *new_leafdata, *rootdata, *leafdata;
501   const PetscInt    *old_local, *point_local;
502   const PetscSFNode *old_remote, *point_remote;
503   PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[0], &old_npoints, &old_nleaf, &old_local, &old_remote));
504   PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
505   PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
506   PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
507   PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
508 
509   // Fill new_leafdata with new owners of all points
510   for (PetscInt i = 0; i < new_npoints; i++) {
511     new_leafdata[i].rank  = rank;
512     new_leafdata[i].index = i;
513   }
514   for (PetscInt i = 0; i < point_nleaf; i++) {
515     PetscInt j      = point_local[i];
516     new_leafdata[j] = point_remote[i];
517   }
518   // REPLACE is okay because every leaf agrees about the new owners
519   PetscCall(PetscSFReduceBegin(sf_migration, MPIU_2INT, new_leafdata, rootdata, MPI_REPLACE));
520   PetscCall(PetscSFReduceEnd(sf_migration, MPIU_2INT, new_leafdata, rootdata, MPI_REPLACE));
521   // rootdata now contains the new owners
522 
523   // Send to leaves of old space
524   for (PetscInt i = 0; i < old_npoints; i++) {
525     leafdata[i].rank  = -1;
526     leafdata[i].index = -1;
527   }
528   PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[0], MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
529   PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[0], MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
530 
531   // Send to new leaf space
532   PetscCall(PetscSFBcastBegin(sf_migration, MPIU_2INT, leafdata, new_leafdata, MPI_REPLACE));
533   PetscCall(PetscSFBcastEnd(sf_migration, MPIU_2INT, leafdata, new_leafdata, MPI_REPLACE));
534 
535   PetscInt     nface = 0, *new_local;
536   PetscSFNode *new_remote;
537   for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
538   PetscCall(PetscMalloc1(nface, &new_local));
539   PetscCall(PetscMalloc1(nface, &new_remote));
540   nface = 0;
541   for (PetscInt i = 0; i < new_npoints; i++) {
542     if (new_leafdata[i].rank == -1) continue;
543     new_local[nface]  = i;
544     new_remote[nface] = new_leafdata[i];
545     nface++;
546   }
547   PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
548   PetscSF sf_face;
549   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf_face));
550   PetscCall(PetscSFSetGraph(sf_face, new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
551   PetscCall(PetscObjectSetName((PetscObject)sf_face, "Migrated Isoperiodic Faces"));
552   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, 1, &sf_face));
553   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, 1, (PetscScalar *)plex->periodic.transform));
554   PetscCall(PetscSFDestroy(&sf_face));
555   PetscFunctionReturn(PETSC_SUCCESS);
556 }
557 
558 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
559 {
560   DM_Plex *plex = (DM_Plex *)dm->data;
561 
562   PetscFunctionBegin;
563   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
564   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
565   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
566 
567   PetscInt dim;
568   PetscCall(DMGetDimension(dm, &dim));
569   size_t count;
570   IS     isdof;
571   {
572     PetscInt        npoints;
573     const PetscInt *points;
574     IS              is = plex->periodic.periodic_points;
575     PetscSegBuffer  seg;
576     PetscSection    section;
577     PetscCall(DMGetLocalSection(dm, &section));
578     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
579     PetscCall(ISGetSize(is, &npoints));
580     PetscCall(ISGetIndices(is, &points));
581     for (PetscInt i = 0; i < npoints; i++) {
582       PetscInt point = points[i], off, dof;
583       PetscCall(PetscSectionGetOffset(section, point, &off));
584       PetscCall(PetscSectionGetDof(section, point, &dof));
585       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
586       for (PetscInt j = 0; j < dof / dim; j++) {
587         PetscInt *slot;
588         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
589         *slot = off / dim + j;
590       }
591     }
592     PetscInt *ind;
593     PetscCall(PetscSegBufferGetSize(seg, &count));
594     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
595     PetscCall(PetscSegBufferDestroy(&seg));
596     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, count, ind, PETSC_OWN_POINTER, &isdof));
597   }
598   Vec        L, P;
599   VecType    vec_type;
600   VecScatter scatter;
601   PetscCall(DMGetLocalVector(dm, &L));
602   PetscCall(VecCreate(PETSC_COMM_SELF, &P));
603   PetscCall(VecSetSizes(P, count * dim, count * dim));
604   PetscCall(VecGetType(L, &vec_type));
605   PetscCall(VecSetType(P, vec_type));
606   PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
607   PetscCall(DMRestoreLocalVector(dm, &L));
608   PetscCall(ISDestroy(&isdof));
609 
610   {
611     PetscScalar *x;
612     PetscCall(VecGetArrayWrite(P, &x));
613     for (PetscInt i = 0; i < (PetscInt)count; i++) {
614       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[0][j][3];
615     }
616     PetscCall(VecRestoreArrayWrite(P, &x));
617   }
618 
619   dm->periodic.affine_to_local = scatter;
620   dm->periodic.affine          = P;
621   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 // We'll just orient all the edges, though only periodic boundary edges need orientation
626 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
627 {
628   PetscInt dim, eStart, eEnd;
629 
630   PetscFunctionBegin;
631   PetscCall(DMGetDimension(dm, &dim));
632   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
633   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
634   for (PetscInt e = eStart; e < eEnd; e++) {
635     const PetscInt *cone;
636     PetscCall(DMPlexGetCone(dm, e, &cone));
637     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
638   }
639   PetscFunctionReturn(PETSC_SUCCESS);
640 }
641 
642 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
643 {
644   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
645   const Ijk closure_1[] = {
646     {0, 0, 0},
647     {1, 0, 0},
648   };
649   const Ijk closure_2[] = {
650     {0, 0, 0},
651     {1, 0, 0},
652     {1, 1, 0},
653     {0, 1, 0},
654   };
655   const Ijk closure_3[] = {
656     {0, 0, 0},
657     {0, 1, 0},
658     {1, 1, 0},
659     {1, 0, 0},
660     {0, 0, 1},
661     {1, 0, 1},
662     {1, 1, 1},
663     {0, 1, 1},
664   };
665   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
666   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
667   const PetscInt        face_marker_1[]   = {1, 2};
668   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
669   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
670   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
671   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
672   // These orientations can be determined by examining cones of a reference quad and hex element.
673   const PetscInt        face_orient_1[]   = {0, 0};
674   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
675   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
676   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
677 
678   PetscFunctionBegin;
679   PetscAssertPointer(dm, 1);
680   PetscValidLogicalCollectiveInt(dm, dim, 2);
681   PetscCall(DMSetDimension(dm, dim));
682   PetscMPIInt rank, size;
683   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
684   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
685   for (PetscInt i = 0; i < dim; i++) {
686     eextent[i] = faces[i];
687     vextent[i] = faces[i] + 1;
688   }
689   ZLayout layout;
690   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
691   PetscZSet vset; // set of all vertices in the closure of the owned elements
692   PetscCall(PetscZSetCreate(&vset));
693   PetscInt local_elems = 0;
694   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
695     Ijk loc = ZCodeSplit(z);
696     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
697     else {
698       z += ZStepOct(z);
699       continue;
700     }
701     if (IjkActive(layout.eextent, loc)) {
702       local_elems++;
703       // Add all neighboring vertices to set
704       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
705         Ijk   inc  = closure_dim[dim][n];
706         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
707         ZCode v    = ZEncode(nloc);
708         PetscCall(PetscZSetAdd(vset, v));
709       }
710     }
711   }
712   PetscInt local_verts, off = 0;
713   ZCode   *vert_z;
714   PetscCall(PetscZSetGetSize(vset, &local_verts));
715   PetscCall(PetscMalloc1(local_verts, &vert_z));
716   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
717   PetscCall(PetscZSetDestroy(&vset));
718   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
719   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
720 
721   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
722   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
723   PetscCall(DMSetUp(dm));
724   {
725     PetscInt e = 0;
726     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
727       Ijk loc = ZCodeSplit(z);
728       if (!IjkActive(layout.eextent, loc)) {
729         z += ZStepOct(z);
730         continue;
731       }
732       PetscInt cone[8], orient[8] = {0};
733       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
734         Ijk      inc  = closure_dim[dim][n];
735         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
736         ZCode    v    = ZEncode(nloc);
737         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
738         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
739         cone[n] = local_elems + ci;
740       }
741       PetscCall(DMPlexSetCone(dm, e, cone));
742       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
743       e++;
744     }
745   }
746 
747   PetscCall(DMPlexSymmetrize(dm));
748   PetscCall(DMPlexStratify(dm));
749 
750   { // Create point SF
751     PetscSF sf;
752     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
753     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
754     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
755     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
756     PetscInt    *local_ghosts;
757     PetscSFNode *ghosts;
758     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
759     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
760     for (PetscInt i = 0; i < num_ghosts;) {
761       ZCode    z           = vert_z[owned_verts + i];
762       PetscInt remote_rank = ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0;
763       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
764       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
765 
766       // Count the elements on remote_rank
767       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
768 
769       // Traverse vertices and make ghost links
770       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
771         Ijk loc = ZCodeSplit(rz);
772         if (rz == z) {
773           local_ghosts[i] = local_elems + owned_verts + i;
774           ghosts[i].rank  = remote_rank;
775           ghosts[i].index = remote_elem + remote_count;
776           i++;
777           if (i == num_ghosts) break;
778           z = vert_z[owned_verts + i];
779         }
780         if (IjkActive(layout.vextent, loc)) remote_count++;
781         else rz += ZStepOct(rz);
782       }
783     }
784     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
785     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
786     PetscCall(DMSetPointSF(dm, sf));
787     PetscCall(PetscSFDestroy(&sf));
788   }
789   {
790     Vec          coordinates;
791     PetscScalar *coords;
792     PetscSection coord_section;
793     PetscInt     coord_size;
794     PetscCall(DMGetCoordinateSection(dm, &coord_section));
795     PetscCall(PetscSectionSetNumFields(coord_section, 1));
796     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
797     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
798     for (PetscInt v = 0; v < local_verts; v++) {
799       PetscInt point = local_elems + v;
800       PetscCall(PetscSectionSetDof(coord_section, point, dim));
801       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
802     }
803     PetscCall(PetscSectionSetUp(coord_section));
804     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
805     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
806     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
807     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
808     PetscCall(VecSetBlockSize(coordinates, dim));
809     PetscCall(VecSetType(coordinates, VECSTANDARD));
810     PetscCall(VecGetArray(coordinates, &coords));
811     for (PetscInt v = 0; v < local_verts; v++) {
812       Ijk loc             = ZCodeSplit(vert_z[v]);
813       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
814       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
815       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
816     }
817     PetscCall(VecRestoreArray(coordinates, &coords));
818     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
819     PetscCall(VecDestroy(&coordinates));
820   }
821   if (interpolate) {
822     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
823     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
824     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
825     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
826     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
827     // be needed in a general CGNS reader, for example.
828     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
829 
830     DMLabel label;
831     PetscCall(DMCreateLabel(dm, "Face Sets"));
832     PetscCall(DMGetLabel(dm, "Face Sets", &label));
833     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
834     for (PetscInt i = 0; i < 3; i++) {
835       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
836       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
837       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
838     }
839     PetscInt fStart, fEnd, vStart, vEnd;
840     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
841     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
842     for (PetscInt f = fStart; f < fEnd; f++) {
843       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
844       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
845       PetscInt bc_count[6] = {0};
846       for (PetscInt i = 0; i < npoints; i++) {
847         PetscInt p = points[2 * i];
848         if (p < vStart || vEnd <= p) continue;
849         fverts[num_fverts++] = p;
850         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
851         // Convention here matches DMPlexCreateCubeMesh_Internal
852         bc_count[0] += loc.i == 0;
853         bc_count[1] += loc.i == layout.vextent.i - 1;
854         bc_count[2] += loc.j == 0;
855         bc_count[3] += loc.j == layout.vextent.j - 1;
856         bc_count[4] += loc.k == 0;
857         bc_count[5] += loc.k == layout.vextent.k - 1;
858       }
859       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
860       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
861         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
862           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
863           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
864             PetscInt *put;
865             if (bc % 2 == 0) { // donor face; no label
866               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
867               *put = f;
868             } else { // periodic face
869               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
870               *put = f;
871               ZCode *zput;
872               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
873               for (PetscInt i = 0; i < num_fverts; i++) {
874                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
875                 switch (bc / 2) {
876                 case 0:
877                   loc.i = 0;
878                   break;
879                 case 1:
880                   loc.j = 0;
881                   break;
882                 case 2:
883                   loc.k = 0;
884                   break;
885                 }
886                 *zput++ = ZEncode(loc);
887               }
888             }
889             continue;
890           }
891           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
892           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
893           bc_match++;
894         }
895       }
896     }
897     // Ensure that the Coordinate DM has our new boundary labels
898     DM cdm;
899     PetscCall(DMGetCoordinateDM(dm, &cdm));
900     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
901     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
902       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
903     }
904     for (PetscInt i = 0; i < 3; i++) {
905       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
906       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
907       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
908     }
909   }
910   PetscCall(PetscFree(layout.zstarts));
911   PetscCall(PetscFree(vert_z));
912   PetscFunctionReturn(PETSC_SUCCESS);
913 }
914 
915 /*@
916   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
917 
918   Logically Collective
919 
920   Input Parameters:
921 + dm           - The `DMPLEX` on which to set periodicity
922 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
923 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
924 
925   Level: advanced
926 
927   Note:
928   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
929   and locally, but are paired when creating a global dof space.
930 
931 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
932 @*/
933 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
934 {
935   DM_Plex *plex = (DM_Plex *)dm->data;
936 
937   PetscFunctionBegin;
938   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
939   if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
940   if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
941 
942   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
943 
944   if (plex->periodic.num_face_sfs > 0) {
945     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
946     PetscCall(PetscFree(plex->periodic.face_sfs));
947   }
948 
949   plex->periodic.num_face_sfs = num_face_sfs;
950   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
951   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
952 
953   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
954   if (cdm) {
955     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
956     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
957   }
958   PetscFunctionReturn(PETSC_SUCCESS);
959 }
960 
961 /*@C
962   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
963 
964   Logically Collective
965 
966   Input Parameter:
967 . dm - The `DMPLEX` for which to obtain periodic relation
968 
969   Output Parameter:
970 + num_face_sfs - Number of `PetscSF`s in the array
971 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
972 
973   Level: advanced
974 
975 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
976 @*/
977 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
978 {
979   DM_Plex *plex = (DM_Plex *)dm->data;
980 
981   PetscFunctionBegin;
982   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
983   *face_sfs     = plex->periodic.face_sfs;
984   *num_face_sfs = plex->periodic.num_face_sfs;
985   PetscFunctionReturn(PETSC_SUCCESS);
986 }
987 
988 /*@C
989   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
990 
991   Logically Collective
992 
993   Input Parameters:
994 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
995 . n  - Number of transforms in array
996 - t  - Array of 4x4 affine transformation basis.
997 
998   Level: advanced
999 
1000   Notes:
1001   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1002   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1003   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1004   simple matrix multiplication.
1005 
1006   Although the interface accepts a general affine transform, only affine translation is supported at present.
1007 
1008   Developer Notes:
1009   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1010   adding GPU implementations to apply the G2L/L2G transforms.
1011 
1012 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1013 @*/
1014 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar *t)
1015 {
1016   DM_Plex *plex = (DM_Plex *)dm->data;
1017 
1018   PetscFunctionBegin;
1019   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1020   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1021 
1022   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1023   for (PetscInt i = 0; i < n; i++) {
1024     for (PetscInt j = 0; j < 4; j++) {
1025       for (PetscInt k = 0; k < 4; k++) {
1026         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1027         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1028       }
1029     }
1030   }
1031   PetscFunctionReturn(PETSC_SUCCESS);
1032 }
1033