xref: /petsc/src/dm/impls/plex/plexsfc.c (revision 9ae3492b1588f8aa23f0d27a51413f308172c30d)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 static unsigned ZCodeSplit1(ZCode z)
21 {
22   z = ((z & 01001001001001001) | ((z >> 2) & 02002002002002002) | ((z >> 4) & 04004004004004004));
23   z = (z | (z >> 6) | (z >> 12)) & 0000000777000000777;
24   z = (z | (z >> 18)) & 0777777;
25   return (unsigned)z;
26 }
27 
28 static ZCode ZEncode1(unsigned t)
29 {
30   ZCode z = t;
31   z       = (z | (z << 18)) & 0777000000777;
32   z       = (z | (z << 6) | (z << 12)) & 07007007007007007;
33   z       = (z | (z << 2) | (z << 4)) & 0111111111111111111;
34   return z;
35 }
36 
37 static Ijk ZCodeSplit(ZCode z)
38 {
39   Ijk c;
40   c.i = ZCodeSplit1(z >> 2);
41   c.j = ZCodeSplit1(z >> 1);
42   c.k = ZCodeSplit1(z >> 0);
43   return c;
44 }
45 
46 static ZCode ZEncode(Ijk c)
47 {
48   ZCode z = (ZEncode1(c.i) << 2) | (ZEncode1(c.j) << 1) | ZEncode1(c.k);
49   return z;
50 }
51 
52 static PetscBool IjkActive(Ijk extent, Ijk l)
53 {
54   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
55   return PETSC_FALSE;
56 }
57 
58 // If z is not the base of an octet (last 3 bits 0), return 0.
59 //
60 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
61 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
62 static ZCode ZStepOct(ZCode z)
63 {
64   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
65   ZCode step = 07;
66   for (; (z & step) == 0; step = (step << 3) | 07) { }
67   return step >> 3;
68 }
69 
70 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
71 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
72 {
73   PetscFunctionBegin;
74   layout->eextent.i = eextent[0];
75   layout->eextent.j = eextent[1];
76   layout->eextent.k = eextent[2];
77   layout->vextent.i = vextent[0];
78   layout->vextent.j = vextent[1];
79   layout->vextent.k = vextent[2];
80   layout->comm_size = size;
81   layout->zstarts   = NULL;
82   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
83 
84   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
85   ZCode    z           = 0;
86   layout->zstarts[0]   = 0;
87   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
88   for (PetscMPIInt r = 0; r < size; r++) {
89     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
90     for (count = 0; count < elems_needed; z++) {
91       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
92       for (ZCode s = skip;; s >>= 3) {
93         Ijk trial = ZCodeSplit(z + s);
94         if (IjkActive(layout->eextent, trial)) {
95           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
96           count += s + 1;
97           z += s;
98           break;
99         }
100         if (s == 0) { // the whole skip octet is inactive
101           z += skip;
102           break;
103         }
104       }
105     }
106     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
107     //
108     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
109     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
110     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
111     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
112     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
113     // complicate the job of identifying an owner and its offset.
114     //
115     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
116     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
117     // the issue:
118     //
119     // Consider this partition on rank 0 (left) and rank 1.
120     //
121     //    4 --------  5 -- 14 --10 -- 21 --11
122     //                |          |          |
123     // 7 -- 16 --  8  |          |          |
124     // |           |  3 -------  7 -------  9
125     // |           |             |          |
126     // 4 --------  6 ------ 10   |          |
127     // |           |         |   6 -- 16 -- 8
128     // |           |         |
129     // 3 ---11---  5 --18--  9
130     //
131     // The periodic face SF looks like
132     // [0] Number of roots=21, leaves=1, remote ranks=1
133     // [0] 16 <- (0,11)
134     // [1] Number of roots=22, leaves=2, remote ranks=2
135     // [1] 14 <- (0,18)
136     // [1] 21 <- (1,16)
137     //
138     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
139     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
140     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
141     //
142     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
143     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
144     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
145     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
146     // stranded vertices.
147     for (; z <= ZEncode(layout->vextent); z++) {
148       Ijk loc = ZCodeSplit(z);
149       if (IjkActive(layout->eextent, loc)) break;
150       z += ZStepOct(z);
151     }
152     layout->zstarts[r + 1] = z;
153   }
154   layout->zstarts[size] = ZEncode(layout->vextent);
155   PetscFunctionReturn(PETSC_SUCCESS);
156 }
157 
158 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
159 {
160   PetscInt remote_elem = 0;
161   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
162     Ijk loc = ZCodeSplit(rz);
163     if (IjkActive(layout->eextent, loc)) remote_elem++;
164     else rz += ZStepOct(rz);
165   }
166   return remote_elem;
167 }
168 
169 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
170 {
171   PetscInt lo = 0, hi = n;
172 
173   if (n == 0) return -1;
174   while (hi - lo > 1) {
175     PetscInt mid = lo + (hi - lo) / 2;
176     if (key < X[mid]) hi = mid;
177     else lo = mid;
178   }
179   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
180 }
181 
182 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
183 {
184   MPI_Comm    comm;
185   PetscInt    dim, vStart, vEnd;
186   PetscMPIInt size;
187   PetscSF     face_sfs[3];
188   PetscScalar transforms[3][4][4] = {{{0}}};
189 
190   PetscFunctionBegin;
191   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
192   PetscCallMPI(MPI_Comm_size(comm, &size));
193   PetscCall(DMGetDimension(dm, &dim));
194   const PetscInt csize = PetscPowInt(2, dim - 1);
195   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
196 
197   PetscInt num_directions = 0;
198   for (PetscInt direction = 0; direction < dim; direction++) {
199     size_t       num_faces;
200     PetscInt    *faces;
201     ZCode       *donor_verts, *donor_minz;
202     PetscSFNode *leaf;
203 
204     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
205     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
206     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
207     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
208     PetscCall(PetscMalloc1(num_faces, &donor_minz));
209     PetscCall(PetscMalloc1(num_faces, &leaf));
210     for (PetscInt i = 0; i < (PetscInt)num_faces; i++) {
211       ZCode minz = donor_verts[i * csize];
212       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
213       donor_minz[i] = minz;
214     }
215     {
216       PetscBool sorted;
217       PetscCall(PetscSortedInt64(num_faces, (const PetscInt64 *)donor_minz, &sorted));
218       // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
219       // Checking for sorting is a cheap check that there are no duplicates.
220       PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
221     }
222     for (PetscInt i = 0; i < (PetscInt)num_faces;) {
223       ZCode    z           = donor_minz[i];
224       PetscInt remote_rank = ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0;
225       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
226       // Process all the vertices on this rank
227       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
228         Ijk loc = ZCodeSplit(rz);
229         if (rz == z) {
230           leaf[i].rank  = remote_rank;
231           leaf[i].index = remote_count;
232           i++;
233           if (i == (PetscInt)num_faces) break;
234           z = donor_minz[i];
235         }
236         if (IjkActive(layout->vextent, loc)) remote_count++;
237       }
238     }
239     PetscCall(PetscFree(donor_minz));
240     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
241     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, num_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
242     const PetscInt *my_donor_degree;
243     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
244     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
245     PetscInt num_multiroots = 0;
246     for (PetscInt i = 0; i < vEnd - vStart; i++) {
247       num_multiroots += my_donor_degree[i];
248       if (my_donor_degree[i] == 0) continue;
249       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
250     }
251     PetscInt *my_donors, *donor_indices, *my_donor_indices;
252     size_t    num_my_donors;
253     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
254     PetscCheck((PetscInt)num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request does not match expected donors");
255     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
256     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
257     for (PetscInt i = 0; i < (PetscInt)num_my_donors; i++) {
258       PetscInt f = my_donors[i];
259       PetscInt num_points, *points = NULL, minv = PETSC_MAX_INT;
260       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
261       for (PetscInt j = 0; j < num_points; j++) {
262         PetscInt p = points[2 * j];
263         if (p < vStart || vEnd <= p) continue;
264         minv = PetscMin(minv, p);
265       }
266       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
267       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
268       my_donor_indices[minv - vStart] = f;
269     }
270     PetscCall(PetscMalloc1(num_faces, &donor_indices));
271     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
272     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
273     PetscCall(PetscFree(my_donor_indices));
274     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
275     for (PetscInt i = 0; i < (PetscInt)num_faces; i++) leaf[i].index = donor_indices[i];
276     PetscCall(PetscFree(donor_indices));
277     PetscInt pStart, pEnd;
278     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
279     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, num_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
280     {
281       char face_sf_name[PETSC_MAX_PATH_LEN];
282       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
283       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
284     }
285 
286     transforms[num_directions][0][0]         = 1;
287     transforms[num_directions][1][1]         = 1;
288     transforms[num_directions][2][2]         = 1;
289     transforms[num_directions][3][3]         = 1;
290     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
291     num_directions++;
292   }
293 
294   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, face_sfs[0]));
295   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, (PetscScalar *)transforms));
296 
297   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
302 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
303 // We use this crude approach here so we don't have to write new GPU kernels yet.
304 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
305 {
306   PetscFunctionBegin;
307   PetscCall(VecScatterBegin(dm->periodic.affine_to_local, dm->periodic.affine, l, ADD_VALUES, SCATTER_FORWARD));
308   PetscCall(VecScatterEnd(dm->periodic.affine_to_local, dm->periodic.affine, l, ADD_VALUES, SCATTER_FORWARD));
309   PetscFunctionReturn(PETSC_SUCCESS);
310 }
311 
312 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
313 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
314 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
315 //
316 // Output Arguments:
317 //
318 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
319 //   can be used to create a global section and section SF.
320 // - is_points - index set for just the points in the closure of `face_sf`. These may be used to apply an affine
321 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
322 //
323 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscSF face_sf, PetscSF *closure_sf, IS *is_points)
324 {
325   MPI_Comm           comm;
326   PetscInt           nroots, nleaves, npoints;
327   const PetscInt    *filocal, *pilocal;
328   const PetscSFNode *firemote, *piremote;
329   PetscMPIInt        rank;
330   PetscSF            point_sf;
331 
332   PetscFunctionBegin;
333   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
334   PetscCallMPI(MPI_Comm_rank(comm, &rank));
335   PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
336   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
337   PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
338   PetscInt *rootdata, *leafdata;
339   PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
340   for (PetscInt i = 0; i < nleaves; i++) {
341     PetscInt point = filocal[i], cl_size, *closure = NULL;
342     PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
343     leafdata[point] = cl_size - 1;
344     PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
345   }
346   PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
347   PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
348 
349   PetscInt root_offset = 0;
350   for (PetscInt p = 0; p < nroots; p++) {
351     const PetscInt *donor_dof = rootdata + nroots;
352     if (donor_dof[p] == 0) {
353       rootdata[2 * p]     = -1;
354       rootdata[2 * p + 1] = -1;
355       continue;
356     }
357     PetscInt  cl_size;
358     PetscInt *closure = NULL;
359     PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
360     // cl_size - 1 = points not including self
361     PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
362     rootdata[2 * p]     = root_offset;
363     rootdata[2 * p + 1] = cl_size - 1;
364     root_offset += cl_size - 1;
365     PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
366   }
367   PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
368   PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
369   // Count how many leaves we need to communicate the closures
370   PetscInt leaf_offset = 0;
371   for (PetscInt i = 0; i < nleaves; i++) {
372     PetscInt point = filocal[i];
373     if (leafdata[2 * point + 1] < 0) continue;
374     leaf_offset += leafdata[2 * point + 1];
375   }
376 
377   PetscSFNode *closure_leaf;
378   PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
379   leaf_offset = 0;
380   for (PetscInt i = 0; i < nleaves; i++) {
381     PetscInt point   = filocal[i];
382     PetscInt cl_size = leafdata[2 * point + 1];
383     if (cl_size < 0) continue;
384     for (PetscInt j = 0; j < cl_size; j++) {
385       closure_leaf[leaf_offset].rank  = firemote[i].rank;
386       closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
387       leaf_offset++;
388     }
389   }
390 
391   PetscSF sf_closure;
392   PetscCall(PetscSFCreate(comm, &sf_closure));
393   PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
394 
395   // Pack root buffer with owner for every point in the root cones
396   PetscSFNode *donor_closure;
397   PetscCall(PetscCalloc1(root_offset, &donor_closure));
398   root_offset = 0;
399   for (PetscInt p = 0; p < nroots; p++) {
400     if (rootdata[2 * p] < 0) continue;
401     PetscInt  cl_size;
402     PetscInt *closure = NULL;
403     PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
404     for (PetscInt j = 1; j < cl_size; j++) {
405       PetscInt c = closure[2 * j];
406       if (pilocal) {
407         PetscInt found = -1;
408         if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
409         if (found >= 0) {
410           donor_closure[root_offset++] = piremote[found];
411           continue;
412         }
413       }
414       // we own c
415       donor_closure[root_offset].rank  = rank;
416       donor_closure[root_offset].index = c;
417       root_offset++;
418     }
419     PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
420   }
421 
422   PetscSFNode *leaf_donor_closure;
423   PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
424   PetscCall(PetscSFBcastBegin(sf_closure, MPIU_2INT, donor_closure, leaf_donor_closure, MPI_REPLACE));
425   PetscCall(PetscSFBcastEnd(sf_closure, MPIU_2INT, donor_closure, leaf_donor_closure, MPI_REPLACE));
426   PetscCall(PetscSFDestroy(&sf_closure));
427   PetscCall(PetscFree(donor_closure));
428 
429   PetscSFNode *new_iremote;
430   PetscCall(PetscCalloc1(nroots, &new_iremote));
431   for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
432   // Walk leaves and match vertices
433   leaf_offset = 0;
434   for (PetscInt i = 0; i < nleaves; i++) {
435     PetscInt  point   = filocal[i], cl_size;
436     PetscInt *closure = NULL;
437     PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
438     for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
439       PetscInt    c  = closure[2 * j];
440       PetscSFNode lc = leaf_donor_closure[leaf_offset];
441       // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
442       if (new_iremote[c].rank == -1) {
443         new_iremote[c] = lc;
444       } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
445       leaf_offset++;
446     }
447     PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
448   }
449   PetscCall(PetscFree(leaf_donor_closure));
450 
451   // Include face points in closure SF
452   for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
453   // consolidate leaves
454   PetscInt num_new_leaves = 0;
455   for (PetscInt i = 0; i < nroots; i++) {
456     if (new_iremote[i].rank == -1) continue;
457     new_iremote[num_new_leaves] = new_iremote[i];
458     leafdata[num_new_leaves]    = i;
459     num_new_leaves++;
460   }
461   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, is_points));
462 
463   PetscSF csf;
464   PetscCall(PetscSFCreate(comm, &csf));
465   PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
466   PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
467   PetscCall(PetscFree2(rootdata, leafdata));
468 
469   if (npoints < 0) { // empty point_sf
470     *closure_sf = csf;
471   } else {
472     PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
473     PetscCall(PetscSFDestroy(&csf));
474   }
475   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
476   PetscFunctionReturn(PETSC_SUCCESS);
477 }
478 
479 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
480 {
481   DM_Plex *plex = (DM_Plex *)dm->data;
482 
483   PetscFunctionBegin;
484   if (!plex->periodic.composed_sf) {
485     PetscSF face_sf = plex->periodic.face_sf;
486 
487     PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, face_sf, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
488   }
489   if (sf) *sf = plex->periodic.composed_sf;
490   PetscFunctionReturn(PETSC_SUCCESS);
491 }
492 
493 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
494 {
495   DM_Plex    *plex = (DM_Plex *)old_dm->data;
496   PetscSF     sf_point;
497   PetscMPIInt rank;
498 
499   PetscFunctionBegin;
500   if (!plex->periodic.face_sf) PetscFunctionReturn(PETSC_SUCCESS);
501   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
502   PetscCall(DMGetPointSF(dm, &sf_point));
503   PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
504   PetscSFNode       *new_leafdata, *rootdata, *leafdata;
505   const PetscInt    *old_local, *point_local;
506   const PetscSFNode *old_remote, *point_remote;
507   PetscCall(PetscSFGetGraph(plex->periodic.face_sf, &old_npoints, &old_nleaf, &old_local, &old_remote));
508   PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
509   PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
510   PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
511   PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
512 
513   // Fill new_leafdata with new owners of all points
514   for (PetscInt i = 0; i < new_npoints; i++) {
515     new_leafdata[i].rank  = rank;
516     new_leafdata[i].index = i;
517   }
518   for (PetscInt i = 0; i < point_nleaf; i++) {
519     PetscInt j      = point_local[i];
520     new_leafdata[j] = point_remote[i];
521   }
522   // REPLACE is okay because every leaf agrees about the new owners
523   PetscCall(PetscSFReduceBegin(sf_migration, MPIU_2INT, new_leafdata, rootdata, MPI_REPLACE));
524   PetscCall(PetscSFReduceEnd(sf_migration, MPIU_2INT, new_leafdata, rootdata, MPI_REPLACE));
525   // rootdata now contains the new owners
526 
527   // Send to leaves of old space
528   for (PetscInt i = 0; i < old_npoints; i++) {
529     leafdata[i].rank  = -1;
530     leafdata[i].index = -1;
531   }
532   PetscCall(PetscSFBcastBegin(plex->periodic.face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
533   PetscCall(PetscSFBcastEnd(plex->periodic.face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
534 
535   // Send to new leaf space
536   PetscCall(PetscSFBcastBegin(sf_migration, MPIU_2INT, leafdata, new_leafdata, MPI_REPLACE));
537   PetscCall(PetscSFBcastEnd(sf_migration, MPIU_2INT, leafdata, new_leafdata, MPI_REPLACE));
538 
539   PetscInt     nface = 0, *new_local;
540   PetscSFNode *new_remote;
541   for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
542   PetscCall(PetscMalloc1(nface, &new_local));
543   PetscCall(PetscMalloc1(nface, &new_remote));
544   nface = 0;
545   for (PetscInt i = 0; i < new_npoints; i++) {
546     if (new_leafdata[i].rank == -1) continue;
547     new_local[nface]  = i;
548     new_remote[nface] = new_leafdata[i];
549     nface++;
550   }
551   PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
552   PetscSF sf_face;
553   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf_face));
554   PetscCall(PetscSFSetGraph(sf_face, new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
555   PetscCall(PetscObjectSetName((PetscObject)sf_face, "Migrated Isoperiodic Faces"));
556   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, sf_face));
557   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, &plex->periodic.transform[0][0]));
558   PetscCall(PetscSFDestroy(&sf_face));
559   PetscFunctionReturn(PETSC_SUCCESS);
560 }
561 
562 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
563 {
564   DM_Plex *plex = (DM_Plex *)dm->data;
565 
566   PetscFunctionBegin;
567   if (!plex->periodic.face_sf) PetscFunctionReturn(PETSC_SUCCESS);
568   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
569   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
570 
571   PetscInt dim;
572   PetscCall(DMGetDimension(dm, &dim));
573   size_t count;
574   IS     isdof;
575   {
576     PetscInt        npoints;
577     const PetscInt *points;
578     IS              is = plex->periodic.periodic_points;
579     PetscSegBuffer  seg;
580     PetscSection    section;
581     PetscCall(DMGetLocalSection(dm, &section));
582     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
583     PetscCall(ISGetSize(is, &npoints));
584     PetscCall(ISGetIndices(is, &points));
585     for (PetscInt i = 0; i < npoints; i++) {
586       PetscInt point = points[i], off, dof;
587       PetscCall(PetscSectionGetOffset(section, point, &off));
588       PetscCall(PetscSectionGetDof(section, point, &dof));
589       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
590       for (PetscInt j = 0; j < dof / dim; j++) {
591         PetscInt *slot;
592         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
593         *slot = off / dim + j;
594       }
595     }
596     PetscInt *ind;
597     PetscCall(PetscSegBufferGetSize(seg, &count));
598     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
599     PetscCall(PetscSegBufferDestroy(&seg));
600     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, count, ind, PETSC_OWN_POINTER, &isdof));
601   }
602   Vec        L, P;
603   VecType    vec_type;
604   VecScatter scatter;
605   PetscCall(DMGetLocalVector(dm, &L));
606   PetscCall(VecCreate(PETSC_COMM_SELF, &P));
607   PetscCall(VecSetSizes(P, count * dim, count * dim));
608   PetscCall(VecGetType(L, &vec_type));
609   PetscCall(VecSetType(P, vec_type));
610   PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
611   PetscCall(DMRestoreLocalVector(dm, &L));
612   PetscCall(ISDestroy(&isdof));
613 
614   {
615     PetscScalar *x;
616     PetscCall(VecGetArrayWrite(P, &x));
617     for (PetscInt i = 0; i < (PetscInt)count; i++) {
618       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[j][3];
619     }
620     PetscCall(VecRestoreArrayWrite(P, &x));
621   }
622 
623   dm->periodic.affine_to_local = scatter;
624   dm->periodic.affine          = P;
625   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 
629 // We'll just orient all the edges, though only periodic boundary edges need orientation
630 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
631 {
632   PetscInt dim, eStart, eEnd;
633 
634   PetscFunctionBegin;
635   PetscCall(DMGetDimension(dm, &dim));
636   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
637   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
638   for (PetscInt e = eStart; e < eEnd; e++) {
639     const PetscInt *cone;
640     PetscCall(DMPlexGetCone(dm, e, &cone));
641     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
642   }
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
647 {
648   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
649   const Ijk closure_1[] = {
650     {0, 0, 0},
651     {1, 0, 0},
652   };
653   const Ijk closure_2[] = {
654     {0, 0, 0},
655     {1, 0, 0},
656     {1, 1, 0},
657     {0, 1, 0},
658   };
659   const Ijk closure_3[] = {
660     {0, 0, 0},
661     {0, 1, 0},
662     {1, 1, 0},
663     {1, 0, 0},
664     {0, 0, 1},
665     {1, 0, 1},
666     {1, 1, 1},
667     {0, 1, 1},
668   };
669   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
670   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
671   const PetscInt        face_marker_1[]   = {1, 2};
672   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
673   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
674   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
675   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
676   // These orientations can be determined by examining cones of a reference quad and hex element.
677   const PetscInt        face_orient_1[]   = {0, 0};
678   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
679   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
680   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
681 
682   PetscFunctionBegin;
683   PetscAssertPointer(dm, 1);
684   PetscValidLogicalCollectiveInt(dm, dim, 2);
685   PetscCall(DMSetDimension(dm, dim));
686   PetscMPIInt rank, size;
687   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
688   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
689   for (PetscInt i = 0; i < dim; i++) {
690     eextent[i] = faces[i];
691     vextent[i] = faces[i] + 1;
692   }
693   ZLayout layout;
694   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
695   PetscZSet vset; // set of all vertices in the closure of the owned elements
696   PetscCall(PetscZSetCreate(&vset));
697   PetscInt local_elems = 0;
698   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
699     Ijk loc = ZCodeSplit(z);
700     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
701     else {
702       z += ZStepOct(z);
703       continue;
704     }
705     if (IjkActive(layout.eextent, loc)) {
706       local_elems++;
707       // Add all neighboring vertices to set
708       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
709         Ijk   inc  = closure_dim[dim][n];
710         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
711         ZCode v    = ZEncode(nloc);
712         PetscCall(PetscZSetAdd(vset, v));
713       }
714     }
715   }
716   PetscInt local_verts, off = 0;
717   ZCode   *vert_z;
718   PetscCall(PetscZSetGetSize(vset, &local_verts));
719   PetscCall(PetscMalloc1(local_verts, &vert_z));
720   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
721   PetscCall(PetscZSetDestroy(&vset));
722   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
723   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
724 
725   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
726   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
727   PetscCall(DMSetUp(dm));
728   {
729     PetscInt e = 0;
730     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
731       Ijk loc = ZCodeSplit(z);
732       if (!IjkActive(layout.eextent, loc)) {
733         z += ZStepOct(z);
734         continue;
735       }
736       PetscInt cone[8], orient[8] = {0};
737       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
738         Ijk      inc  = closure_dim[dim][n];
739         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
740         ZCode    v    = ZEncode(nloc);
741         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
742         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
743         cone[n] = local_elems + ci;
744       }
745       PetscCall(DMPlexSetCone(dm, e, cone));
746       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
747       e++;
748     }
749   }
750 
751   PetscCall(DMPlexSymmetrize(dm));
752   PetscCall(DMPlexStratify(dm));
753 
754   { // Create point SF
755     PetscSF sf;
756     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
757     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
758     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
759     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
760     PetscInt    *local_ghosts;
761     PetscSFNode *ghosts;
762     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
763     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
764     for (PetscInt i = 0; i < num_ghosts;) {
765       ZCode    z           = vert_z[owned_verts + i];
766       PetscInt remote_rank = ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0;
767       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
768       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
769 
770       // Count the elements on remote_rank
771       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
772 
773       // Traverse vertices and make ghost links
774       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
775         Ijk loc = ZCodeSplit(rz);
776         if (rz == z) {
777           local_ghosts[i] = local_elems + owned_verts + i;
778           ghosts[i].rank  = remote_rank;
779           ghosts[i].index = remote_elem + remote_count;
780           i++;
781           if (i == num_ghosts) break;
782           z = vert_z[owned_verts + i];
783         }
784         if (IjkActive(layout.vextent, loc)) remote_count++;
785         else rz += ZStepOct(rz);
786       }
787     }
788     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
789     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
790     PetscCall(DMSetPointSF(dm, sf));
791     PetscCall(PetscSFDestroy(&sf));
792   }
793   {
794     Vec          coordinates;
795     PetscScalar *coords;
796     PetscSection coord_section;
797     PetscInt     coord_size;
798     PetscCall(DMGetCoordinateSection(dm, &coord_section));
799     PetscCall(PetscSectionSetNumFields(coord_section, 1));
800     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
801     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
802     for (PetscInt v = 0; v < local_verts; v++) {
803       PetscInt point = local_elems + v;
804       PetscCall(PetscSectionSetDof(coord_section, point, dim));
805       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
806     }
807     PetscCall(PetscSectionSetUp(coord_section));
808     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
809     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
810     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
811     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
812     PetscCall(VecSetBlockSize(coordinates, dim));
813     PetscCall(VecSetType(coordinates, VECSTANDARD));
814     PetscCall(VecGetArray(coordinates, &coords));
815     for (PetscInt v = 0; v < local_verts; v++) {
816       Ijk loc             = ZCodeSplit(vert_z[v]);
817       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
818       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
819       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
820     }
821     PetscCall(VecRestoreArray(coordinates, &coords));
822     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
823     PetscCall(VecDestroy(&coordinates));
824   }
825   if (interpolate) {
826     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
827     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
828     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
829     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
830     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
831     // be needed in a general CGNS reader, for example.
832     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
833 
834     DMLabel label;
835     PetscCall(DMCreateLabel(dm, "Face Sets"));
836     PetscCall(DMGetLabel(dm, "Face Sets", &label));
837     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
838     for (PetscInt i = 0; i < 3; i++) {
839       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
840       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
841       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
842     }
843     PetscInt fStart, fEnd, vStart, vEnd;
844     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
845     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
846     for (PetscInt f = fStart; f < fEnd; f++) {
847       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
848       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
849       PetscInt bc_count[6] = {0};
850       for (PetscInt i = 0; i < npoints; i++) {
851         PetscInt p = points[2 * i];
852         if (p < vStart || vEnd <= p) continue;
853         fverts[num_fverts++] = p;
854         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
855         // Convention here matches DMPlexCreateCubeMesh_Internal
856         bc_count[0] += loc.i == 0;
857         bc_count[1] += loc.i == layout.vextent.i - 1;
858         bc_count[2] += loc.j == 0;
859         bc_count[3] += loc.j == layout.vextent.j - 1;
860         bc_count[4] += loc.k == 0;
861         bc_count[5] += loc.k == layout.vextent.k - 1;
862       }
863       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
864       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
865         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
866           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
867           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
868             PetscInt *put;
869             if (bc % 2 == 0) { // donor face; no label
870               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
871               *put = f;
872             } else { // periodic face
873               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
874               *put = f;
875               ZCode *zput;
876               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
877               for (PetscInt i = 0; i < num_fverts; i++) {
878                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
879                 switch (bc / 2) {
880                 case 0:
881                   loc.i = 0;
882                   break;
883                 case 1:
884                   loc.j = 0;
885                   break;
886                 case 2:
887                   loc.k = 0;
888                   break;
889                 }
890                 *zput++ = ZEncode(loc);
891               }
892             }
893             continue;
894           }
895           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
896           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
897           bc_match++;
898         }
899       }
900     }
901     // Ensure that the Coordinate DM has our new boundary labels
902     DM cdm;
903     PetscCall(DMGetCoordinateDM(dm, &cdm));
904     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
905     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
906       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
907     }
908     for (PetscInt i = 0; i < 3; i++) {
909       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
910       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
911       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
912     }
913   }
914   PetscCall(PetscFree(layout.zstarts));
915   PetscCall(PetscFree(vert_z));
916   PetscFunctionReturn(PETSC_SUCCESS);
917 }
918 
919 /*@
920   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
921 
922   Logically Collective
923 
924   Input Parameters:
925 + dm      - The `DMPLEX` on which to set periodicity
926 - face_sf - `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
927 
928   Level: advanced
929 
930   Note:
931   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
932   and locally, but are paired when creating a global dof space.
933 
934 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
935 @*/
936 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscSF face_sf)
937 {
938   DM_Plex *plex = (DM_Plex *)dm->data;
939 
940   PetscFunctionBegin;
941   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
942   PetscCall(PetscObjectReference((PetscObject)face_sf));
943   PetscCall(PetscSFDestroy(&plex->periodic.face_sf));
944   plex->periodic.face_sf = face_sf;
945   if (face_sf) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
946 
947   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
948   if (cdm) {
949     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, face_sf));
950     if (face_sf) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
951   }
952   PetscFunctionReturn(PETSC_SUCCESS);
953 }
954 
955 /*@
956   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
957 
958   Logically Collective
959 
960   Input Parameter:
961 . dm - The `DMPLEX` for which to obtain periodic relation
962 
963   Output Parameter:
964 . face_sf - `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
965 
966   Level: advanced
967 
968 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
969 @*/
970 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscSF *face_sf)
971 {
972   DM_Plex *plex = (DM_Plex *)dm->data;
973 
974   PetscFunctionBegin;
975   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
976   *face_sf = plex->periodic.face_sf;
977   PetscFunctionReturn(PETSC_SUCCESS);
978 }
979 
980 /*@C
981   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
982 
983   Logically Collective
984 
985   Input Parameters:
986 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
987 - t  - 4x4 affine transformation basis.
988 
989   Level: advanced
990 
991   Notes:
992   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
993   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
994   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
995   simple matrix multiplication.
996 
997   Although the interface accepts a general affine transform, only affine translation is supported at present.
998 
999   Developer Notes:
1000   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1001   adding GPU implementations to apply the G2L/L2G transforms.
1002 
1003 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1004 @*/
1005 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, const PetscScalar t[])
1006 {
1007   DM_Plex *plex = (DM_Plex *)dm->data;
1008 
1009   PetscFunctionBegin;
1010   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1011   for (PetscInt i = 0; i < 4; i++) {
1012     for (PetscInt j = 0; j < 4; j++) {
1013       PetscCheck(i != j || t[i * 4 + j] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1014       plex->periodic.transform[i][j] = t[i * 4 + j];
1015     }
1016   }
1017   PetscFunctionReturn(PETSC_SUCCESS);
1018 }
1019