xref: /petsc/src/dm/impls/plex/plexsfc.c (revision b83f62b02c23980c157021f4bce0d176a7e34e97)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 static unsigned ZCodeSplit1(ZCode z)
21 {
22   z = ((z & 01001001001001001) | ((z >> 2) & 02002002002002002) | ((z >> 4) & 04004004004004004));
23   z = (z | (z >> 6) | (z >> 12)) & 0000000777000000777;
24   z = (z | (z >> 18)) & 0777777;
25   return (unsigned)z;
26 }
27 
28 static ZCode ZEncode1(unsigned t)
29 {
30   ZCode z = t;
31   z       = (z | (z << 18)) & 0777000000777;
32   z       = (z | (z << 6) | (z << 12)) & 07007007007007007;
33   z       = (z | (z << 2) | (z << 4)) & 0111111111111111111;
34   return z;
35 }
36 
37 static Ijk ZCodeSplit(ZCode z)
38 {
39   Ijk c;
40   c.i = ZCodeSplit1(z >> 2);
41   c.j = ZCodeSplit1(z >> 1);
42   c.k = ZCodeSplit1(z >> 0);
43   return c;
44 }
45 
46 static ZCode ZEncode(Ijk c)
47 {
48   ZCode z = (ZEncode1(c.i) << 2) | (ZEncode1(c.j) << 1) | ZEncode1(c.k);
49   return z;
50 }
51 
52 static PetscBool IjkActive(Ijk extent, Ijk l)
53 {
54   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
55   return PETSC_FALSE;
56 }
57 
58 // If z is not the base of an octet (last 3 bits 0), return 0.
59 //
60 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
61 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
62 static ZCode ZStepOct(ZCode z)
63 {
64   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
65   ZCode step = 07;
66   for (; (z & step) == 0; step = (step << 3) | 07) { }
67   return step >> 3;
68 }
69 
70 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
71 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
72 {
73   PetscFunctionBegin;
74   layout->eextent.i = eextent[0];
75   layout->eextent.j = eextent[1];
76   layout->eextent.k = eextent[2];
77   layout->vextent.i = vextent[0];
78   layout->vextent.j = vextent[1];
79   layout->vextent.k = vextent[2];
80   layout->comm_size = size;
81   layout->zstarts   = NULL;
82   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
83 
84   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
85   ZCode    z           = 0;
86   layout->zstarts[0]   = 0;
87   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
88   for (PetscMPIInt r = 0; r < size; r++) {
89     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
90     for (count = 0; count < elems_needed; z++) {
91       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
92       for (ZCode s = skip;; s >>= 3) {
93         Ijk trial = ZCodeSplit(z + s);
94         if (IjkActive(layout->eextent, trial)) {
95           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
96           count += s + 1;
97           z += s;
98           break;
99         }
100         if (s == 0) { // the whole skip octet is inactive
101           z += skip;
102           break;
103         }
104       }
105     }
106     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
107     //
108     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
109     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
110     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
111     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
112     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
113     // complicate the job of identifying an owner and its offset.
114     //
115     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
116     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
117     // the issue:
118     //
119     // Consider this partition on rank 0 (left) and rank 1.
120     //
121     //    4 --------  5 -- 14 --10 -- 21 --11
122     //                |          |          |
123     // 7 -- 16 --  8  |          |          |
124     // |           |  3 -------  7 -------  9
125     // |           |             |          |
126     // 4 --------  6 ------ 10   |          |
127     // |           |         |   6 -- 16 -- 8
128     // |           |         |
129     // 3 ---11---  5 --18--  9
130     //
131     // The periodic face SF looks like
132     // [0] Number of roots=21, leaves=1, remote ranks=1
133     // [0] 16 <- (0,11)
134     // [1] Number of roots=22, leaves=2, remote ranks=2
135     // [1] 14 <- (0,18)
136     // [1] 21 <- (1,16)
137     //
138     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
139     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
140     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
141     //
142     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
143     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
144     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
145     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
146     // stranded vertices.
147     for (; z <= ZEncode(layout->vextent); z++) {
148       Ijk loc = ZCodeSplit(z);
149       if (IjkActive(layout->eextent, loc)) break;
150       z += ZStepOct(z);
151     }
152     layout->zstarts[r + 1] = z;
153   }
154   layout->zstarts[size] = ZEncode(layout->vextent);
155   PetscFunctionReturn(PETSC_SUCCESS);
156 }
157 
158 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
159 {
160   PetscInt remote_elem = 0;
161   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
162     Ijk loc = ZCodeSplit(rz);
163     if (IjkActive(layout->eextent, loc)) remote_elem++;
164     else rz += ZStepOct(rz);
165   }
166   return remote_elem;
167 }
168 
169 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
170 {
171   PetscInt lo = 0, hi = n;
172 
173   if (n == 0) return -1;
174   while (hi - lo > 1) {
175     PetscInt mid = lo + (hi - lo) / 2;
176     if (key < X[mid]) hi = mid;
177     else lo = mid;
178   }
179   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
180 }
181 
182 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
183 {
184   MPI_Comm    comm;
185   PetscInt    dim, vStart, vEnd;
186   PetscMPIInt size;
187   PetscSF     face_sfs[3];
188   PetscScalar transforms[3][4][4] = {{{0}}};
189 
190   PetscFunctionBegin;
191   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
192   PetscCallMPI(MPI_Comm_size(comm, &size));
193   PetscCall(DMGetDimension(dm, &dim));
194   const PetscInt csize = PetscPowInt(2, dim - 1);
195   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
196 
197   PetscInt num_directions = 0;
198   for (PetscInt direction = 0; direction < dim; direction++) {
199     size_t       num_faces;
200     PetscInt    *faces;
201     ZCode       *donor_verts, *donor_minz;
202     PetscSFNode *leaf;
203 
204     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
205     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
206     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
207     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
208     PetscCall(PetscMalloc1(num_faces, &donor_minz));
209     PetscCall(PetscMalloc1(num_faces, &leaf));
210     for (PetscInt i = 0; i < (PetscInt)num_faces; i++) {
211       ZCode minz = donor_verts[i * csize];
212       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
213       donor_minz[i] = minz;
214     }
215     {
216       PetscBool sorted;
217       PetscCall(PetscSortedInt64(num_faces, (const PetscInt64 *)donor_minz, &sorted));
218       // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
219       // Checking for sorting is a cheap check that there are no duplicates.
220       PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
221     }
222     for (PetscInt i = 0; i < (PetscInt)num_faces;) {
223       ZCode    z           = donor_minz[i];
224       PetscInt remote_rank = ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0;
225       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
226       // Process all the vertices on this rank
227       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
228         Ijk loc = ZCodeSplit(rz);
229         if (rz == z) {
230           leaf[i].rank  = remote_rank;
231           leaf[i].index = remote_count;
232           i++;
233           if (i == (PetscInt)num_faces) break;
234           z = donor_minz[i];
235         }
236         if (IjkActive(layout->vextent, loc)) remote_count++;
237       }
238     }
239     PetscCall(PetscFree(donor_minz));
240     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
241     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, num_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
242     const PetscInt *my_donor_degree;
243     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
244     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
245     PetscInt num_multiroots = 0;
246     for (PetscInt i = 0; i < vEnd - vStart; i++) {
247       num_multiroots += my_donor_degree[i];
248       if (my_donor_degree[i] == 0) continue;
249       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
250     }
251     PetscInt *my_donors, *donor_indices, *my_donor_indices;
252     size_t    num_my_donors;
253     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
254     PetscCheck((PetscInt)num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request does not match expected donors");
255     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
256     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
257     for (PetscInt i = 0; i < (PetscInt)num_my_donors; i++) {
258       PetscInt f = my_donors[i];
259       PetscInt num_points, *points = NULL, minv = PETSC_MAX_INT;
260       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
261       for (PetscInt j = 0; j < num_points; j++) {
262         PetscInt p = points[2 * j];
263         if (p < vStart || vEnd <= p) continue;
264         minv = PetscMin(minv, p);
265       }
266       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
267       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
268       my_donor_indices[minv - vStart] = f;
269     }
270     PetscCall(PetscMalloc1(num_faces, &donor_indices));
271     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
272     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
273     PetscCall(PetscFree(my_donor_indices));
274     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
275     for (PetscInt i = 0; i < (PetscInt)num_faces; i++) leaf[i].index = donor_indices[i];
276     PetscCall(PetscFree(donor_indices));
277     PetscInt pStart, pEnd;
278     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
279     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, num_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
280     {
281       char face_sf_name[PETSC_MAX_PATH_LEN];
282       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
283       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
284     }
285 
286     transforms[num_directions][0][0]         = 1;
287     transforms[num_directions][1][1]         = 1;
288     transforms[num_directions][2][2]         = 1;
289     transforms[num_directions][3][3]         = 1;
290     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
291     num_directions++;
292   }
293 
294   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
295   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
296 
297   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
302 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
303 // We use this crude approach here so we don't have to write new GPU kernels yet.
304 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
305 {
306   PetscFunctionBegin;
307   PetscCall(VecScatterBegin(dm->periodic.affine_to_local, dm->periodic.affine, l, ADD_VALUES, SCATTER_FORWARD));
308   PetscCall(VecScatterEnd(dm->periodic.affine_to_local, dm->periodic.affine, l, ADD_VALUES, SCATTER_FORWARD));
309   PetscFunctionReturn(PETSC_SUCCESS);
310 }
311 
312 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
313 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
314 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
315 //
316 // Output Arguments:
317 //
318 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
319 //   can be used to create a global section and section SF.
320 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
321 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
322 //
323 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
324 {
325   MPI_Comm           comm;
326   PetscMPIInt        rank;
327   PetscSF            point_sf;
328   PetscInt           nroots, nleaves;
329   const PetscInt    *filocal;
330   const PetscSFNode *firemote;
331 
332   PetscFunctionBegin;
333   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
334   PetscCallMPI(MPI_Comm_rank(comm, &rank));
335   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
336   PetscCall(PetscMalloc1(num_face_sfs, is_points));
337 
338   for (PetscInt f = 0; f < num_face_sfs; f++) {
339     PetscSF   face_sf = face_sfs[f];
340     PetscInt *rootdata, *leafdata;
341 
342     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
343     PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
344     for (PetscInt i = 0; i < nleaves; i++) {
345       PetscInt point = filocal[i], cl_size, *closure = NULL;
346       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
347       leafdata[point] = cl_size - 1;
348       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
349     }
350     PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
351     PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
352 
353     PetscInt root_offset = 0;
354     for (PetscInt p = 0; p < nroots; p++) {
355       const PetscInt *donor_dof = rootdata + nroots;
356       if (donor_dof[p] == 0) {
357         rootdata[2 * p]     = -1;
358         rootdata[2 * p + 1] = -1;
359         continue;
360       }
361       PetscInt  cl_size;
362       PetscInt *closure = NULL;
363       PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
364       // cl_size - 1 = points not including self
365       PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
366       rootdata[2 * p]     = root_offset;
367       rootdata[2 * p + 1] = cl_size - 1;
368       root_offset += cl_size - 1;
369       PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
370     }
371     PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
372     PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
373     // Count how many leaves we need to communicate the closures
374     PetscInt leaf_offset = 0;
375     for (PetscInt i = 0; i < nleaves; i++) {
376       PetscInt point = filocal[i];
377       if (leafdata[2 * point + 1] < 0) continue;
378       leaf_offset += leafdata[2 * point + 1];
379     }
380 
381     PetscSFNode *closure_leaf;
382     PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
383     leaf_offset = 0;
384     for (PetscInt i = 0; i < nleaves; i++) {
385       PetscInt point   = filocal[i];
386       PetscInt cl_size = leafdata[2 * point + 1];
387       if (cl_size < 0) continue;
388       for (PetscInt j = 0; j < cl_size; j++) {
389         closure_leaf[leaf_offset].rank  = firemote[i].rank;
390         closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
391         leaf_offset++;
392       }
393     }
394 
395     PetscSF sf_closure;
396     PetscCall(PetscSFCreate(comm, &sf_closure));
397     PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
398 
399     PetscSFNode *leaf_donor_closure;
400     { // Pack root buffer with owner for every point in the root cones
401       PetscSFNode       *donor_closure;
402       const PetscInt    *pilocal;
403       const PetscSFNode *piremote;
404       PetscInt           npoints;
405 
406       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
407       PetscCall(PetscCalloc1(root_offset, &donor_closure));
408       root_offset = 0;
409       for (PetscInt p = 0; p < nroots; p++) {
410         if (rootdata[2 * p] < 0) continue;
411         PetscInt  cl_size;
412         PetscInt *closure = NULL;
413         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
414         for (PetscInt j = 1; j < cl_size; j++) {
415           PetscInt c = closure[2 * j];
416           if (pilocal) {
417             PetscInt found = -1;
418             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
419             if (found >= 0) {
420               donor_closure[root_offset++] = piremote[found];
421               continue;
422             }
423           }
424           // we own c
425           donor_closure[root_offset].rank  = rank;
426           donor_closure[root_offset].index = c;
427           root_offset++;
428         }
429         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
430       }
431 
432       PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
433       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_2INT, donor_closure, leaf_donor_closure, MPI_REPLACE));
434       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_2INT, donor_closure, leaf_donor_closure, MPI_REPLACE));
435       PetscCall(PetscSFDestroy(&sf_closure));
436       PetscCall(PetscFree(donor_closure));
437     }
438 
439     PetscSFNode *new_iremote;
440     PetscCall(PetscCalloc1(nroots, &new_iremote));
441     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
442     // Walk leaves and match vertices
443     leaf_offset = 0;
444     for (PetscInt i = 0; i < nleaves; i++) {
445       PetscInt  point   = filocal[i], cl_size;
446       PetscInt *closure = NULL;
447       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
448       for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
449         PetscInt    c  = closure[2 * j];
450         PetscSFNode lc = leaf_donor_closure[leaf_offset];
451         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
452         if (new_iremote[c].rank == -1) {
453           new_iremote[c] = lc;
454         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
455         leaf_offset++;
456       }
457       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
458     }
459     PetscCall(PetscFree(leaf_donor_closure));
460 
461     // Include face points in closure SF
462     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
463     // consolidate leaves
464     PetscInt num_new_leaves = 0;
465     for (PetscInt i = 0; i < nroots; i++) {
466       if (new_iremote[i].rank == -1) continue;
467       new_iremote[num_new_leaves] = new_iremote[i];
468       leafdata[num_new_leaves]    = i;
469       num_new_leaves++;
470     }
471     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
472 
473     PetscSF csf;
474     PetscCall(PetscSFCreate(comm, &csf));
475     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
476     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
477     PetscCall(PetscFree2(rootdata, leafdata));
478 
479     PetscInt npoints;
480     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
481     if (npoints < 0) { // empty point_sf
482       *closure_sf = csf;
483     } else {
484       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
485       PetscCall(PetscSFDestroy(&csf));
486     }
487     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destory if point_sf is from previous calls to PetscSFMerge
488     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
489   }
490   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
491   PetscFunctionReturn(PETSC_SUCCESS);
492 }
493 
494 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
495 {
496   DM_Plex *plex = (DM_Plex *)dm->data;
497 
498   PetscFunctionBegin;
499   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
500   if (sf) *sf = plex->periodic.composed_sf;
501   PetscFunctionReturn(PETSC_SUCCESS);
502 }
503 
504 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
505 {
506   DM_Plex    *plex = (DM_Plex *)old_dm->data;
507   PetscSF     sf_point, *new_face_sfs;
508   PetscMPIInt rank;
509 
510   PetscFunctionBegin;
511   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
512   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
513   PetscCall(DMGetPointSF(dm, &sf_point));
514   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
515 
516   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
517     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
518     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
519     const PetscInt    *old_local, *point_local;
520     const PetscSFNode *old_remote, *point_remote;
521     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
522     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
523     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
524     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
525     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
526 
527     // Fill new_leafdata with new owners of all points
528     for (PetscInt i = 0; i < new_npoints; i++) {
529       new_leafdata[i].rank  = rank;
530       new_leafdata[i].index = i;
531     }
532     for (PetscInt i = 0; i < point_nleaf; i++) {
533       PetscInt j      = point_local[i];
534       new_leafdata[j] = point_remote[i];
535     }
536     // REPLACE is okay because every leaf agrees about the new owners
537     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_2INT, new_leafdata, rootdata, MPI_REPLACE));
538     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_2INT, new_leafdata, rootdata, MPI_REPLACE));
539     // rootdata now contains the new owners
540 
541     // Send to leaves of old space
542     for (PetscInt i = 0; i < old_npoints; i++) {
543       leafdata[i].rank  = -1;
544       leafdata[i].index = -1;
545     }
546     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
547     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
548 
549     // Send to new leaf space
550     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_2INT, leafdata, new_leafdata, MPI_REPLACE));
551     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_2INT, leafdata, new_leafdata, MPI_REPLACE));
552 
553     PetscInt     nface = 0, *new_local;
554     PetscSFNode *new_remote;
555     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
556     PetscCall(PetscMalloc1(nface, &new_local));
557     PetscCall(PetscMalloc1(nface, &new_remote));
558     nface = 0;
559     for (PetscInt i = 0; i < new_npoints; i++) {
560       if (new_leafdata[i].rank == -1) continue;
561       new_local[nface]  = i;
562       new_remote[nface] = new_leafdata[i];
563       nface++;
564     }
565     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
566     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
567     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
568     {
569       char new_face_sf_name[PETSC_MAX_PATH_LEN];
570       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
571       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
572     }
573   }
574 
575   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
576   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
577   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
578   PetscCall(PetscFree(new_face_sfs));
579   PetscFunctionReturn(PETSC_SUCCESS);
580 }
581 
582 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
583 {
584   DM_Plex *plex = (DM_Plex *)dm->data;
585 
586   PetscFunctionBegin;
587   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
588   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
590 
591   PetscInt dim;
592   PetscCall(DMGetDimension(dm, &dim));
593   size_t count;
594   IS     isdof;
595   {
596     PetscInt        npoints;
597     const PetscInt *points;
598     IS              is = plex->periodic.periodic_points[0];
599     PetscSegBuffer  seg;
600     PetscSection    section;
601     PetscCall(DMGetLocalSection(dm, &section));
602     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
603     PetscCall(ISGetSize(is, &npoints));
604     PetscCall(ISGetIndices(is, &points));
605     for (PetscInt i = 0; i < npoints; i++) {
606       PetscInt point = points[i], off, dof;
607       PetscCall(PetscSectionGetOffset(section, point, &off));
608       PetscCall(PetscSectionGetDof(section, point, &dof));
609       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
610       for (PetscInt j = 0; j < dof / dim; j++) {
611         PetscInt *slot;
612         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
613         *slot = off / dim + j;
614       }
615     }
616     PetscInt *ind;
617     PetscCall(PetscSegBufferGetSize(seg, &count));
618     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
619     PetscCall(PetscSegBufferDestroy(&seg));
620     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, count, ind, PETSC_OWN_POINTER, &isdof));
621   }
622   Vec        L, P;
623   VecType    vec_type;
624   VecScatter scatter;
625   PetscCall(DMGetLocalVector(dm, &L));
626   PetscCall(VecCreate(PETSC_COMM_SELF, &P));
627   PetscCall(VecSetSizes(P, count * dim, count * dim));
628   PetscCall(VecGetType(L, &vec_type));
629   PetscCall(VecSetType(P, vec_type));
630   PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
631   PetscCall(DMRestoreLocalVector(dm, &L));
632   PetscCall(ISDestroy(&isdof));
633 
634   {
635     PetscScalar *x;
636     PetscCall(VecGetArrayWrite(P, &x));
637     for (PetscInt i = 0; i < (PetscInt)count; i++) {
638       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[0][j][3];
639     }
640     PetscCall(VecRestoreArrayWrite(P, &x));
641   }
642 
643   dm->periodic.affine_to_local = scatter;
644   dm->periodic.affine          = P;
645   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
646   PetscFunctionReturn(PETSC_SUCCESS);
647 }
648 
649 // We'll just orient all the edges, though only periodic boundary edges need orientation
650 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
651 {
652   PetscInt dim, eStart, eEnd;
653 
654   PetscFunctionBegin;
655   PetscCall(DMGetDimension(dm, &dim));
656   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
657   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
658   for (PetscInt e = eStart; e < eEnd; e++) {
659     const PetscInt *cone;
660     PetscCall(DMPlexGetCone(dm, e, &cone));
661     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
662   }
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
667 {
668   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
669   const Ijk closure_1[] = {
670     {0, 0, 0},
671     {1, 0, 0},
672   };
673   const Ijk closure_2[] = {
674     {0, 0, 0},
675     {1, 0, 0},
676     {1, 1, 0},
677     {0, 1, 0},
678   };
679   const Ijk closure_3[] = {
680     {0, 0, 0},
681     {0, 1, 0},
682     {1, 1, 0},
683     {1, 0, 0},
684     {0, 0, 1},
685     {1, 0, 1},
686     {1, 1, 1},
687     {0, 1, 1},
688   };
689   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
690   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
691   const PetscInt        face_marker_1[]   = {1, 2};
692   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
693   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
694   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
695   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
696   // These orientations can be determined by examining cones of a reference quad and hex element.
697   const PetscInt        face_orient_1[]   = {0, 0};
698   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
699   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
700   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
701 
702   PetscFunctionBegin;
703   PetscAssertPointer(dm, 1);
704   PetscValidLogicalCollectiveInt(dm, dim, 2);
705   PetscCall(DMSetDimension(dm, dim));
706   PetscMPIInt rank, size;
707   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
708   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
709   for (PetscInt i = 0; i < dim; i++) {
710     eextent[i] = faces[i];
711     vextent[i] = faces[i] + 1;
712   }
713   ZLayout layout;
714   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
715   PetscZSet vset; // set of all vertices in the closure of the owned elements
716   PetscCall(PetscZSetCreate(&vset));
717   PetscInt local_elems = 0;
718   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
719     Ijk loc = ZCodeSplit(z);
720     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
721     else {
722       z += ZStepOct(z);
723       continue;
724     }
725     if (IjkActive(layout.eextent, loc)) {
726       local_elems++;
727       // Add all neighboring vertices to set
728       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
729         Ijk   inc  = closure_dim[dim][n];
730         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
731         ZCode v    = ZEncode(nloc);
732         PetscCall(PetscZSetAdd(vset, v));
733       }
734     }
735   }
736   PetscInt local_verts, off = 0;
737   ZCode   *vert_z;
738   PetscCall(PetscZSetGetSize(vset, &local_verts));
739   PetscCall(PetscMalloc1(local_verts, &vert_z));
740   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
741   PetscCall(PetscZSetDestroy(&vset));
742   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
743   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
744 
745   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
746   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
747   PetscCall(DMSetUp(dm));
748   {
749     PetscInt e = 0;
750     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
751       Ijk loc = ZCodeSplit(z);
752       if (!IjkActive(layout.eextent, loc)) {
753         z += ZStepOct(z);
754         continue;
755       }
756       PetscInt cone[8], orient[8] = {0};
757       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
758         Ijk      inc  = closure_dim[dim][n];
759         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
760         ZCode    v    = ZEncode(nloc);
761         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
762         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
763         cone[n] = local_elems + ci;
764       }
765       PetscCall(DMPlexSetCone(dm, e, cone));
766       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
767       e++;
768     }
769   }
770 
771   PetscCall(DMPlexSymmetrize(dm));
772   PetscCall(DMPlexStratify(dm));
773 
774   { // Create point SF
775     PetscSF sf;
776     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
777     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
778     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
779     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
780     PetscInt    *local_ghosts;
781     PetscSFNode *ghosts;
782     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
783     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
784     for (PetscInt i = 0; i < num_ghosts;) {
785       ZCode    z           = vert_z[owned_verts + i];
786       PetscInt remote_rank = ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0;
787       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
788       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
789 
790       // Count the elements on remote_rank
791       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
792 
793       // Traverse vertices and make ghost links
794       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
795         Ijk loc = ZCodeSplit(rz);
796         if (rz == z) {
797           local_ghosts[i] = local_elems + owned_verts + i;
798           ghosts[i].rank  = remote_rank;
799           ghosts[i].index = remote_elem + remote_count;
800           i++;
801           if (i == num_ghosts) break;
802           z = vert_z[owned_verts + i];
803         }
804         if (IjkActive(layout.vextent, loc)) remote_count++;
805         else rz += ZStepOct(rz);
806       }
807     }
808     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
809     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
810     PetscCall(DMSetPointSF(dm, sf));
811     PetscCall(PetscSFDestroy(&sf));
812   }
813   {
814     Vec          coordinates;
815     PetscScalar *coords;
816     PetscSection coord_section;
817     PetscInt     coord_size;
818     PetscCall(DMGetCoordinateSection(dm, &coord_section));
819     PetscCall(PetscSectionSetNumFields(coord_section, 1));
820     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
821     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
822     for (PetscInt v = 0; v < local_verts; v++) {
823       PetscInt point = local_elems + v;
824       PetscCall(PetscSectionSetDof(coord_section, point, dim));
825       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
826     }
827     PetscCall(PetscSectionSetUp(coord_section));
828     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
829     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
830     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
831     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
832     PetscCall(VecSetBlockSize(coordinates, dim));
833     PetscCall(VecSetType(coordinates, VECSTANDARD));
834     PetscCall(VecGetArray(coordinates, &coords));
835     for (PetscInt v = 0; v < local_verts; v++) {
836       Ijk loc             = ZCodeSplit(vert_z[v]);
837       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
838       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
839       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
840     }
841     PetscCall(VecRestoreArray(coordinates, &coords));
842     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
843     PetscCall(VecDestroy(&coordinates));
844   }
845   if (interpolate) {
846     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
847     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
848     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
849     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
850     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
851     // be needed in a general CGNS reader, for example.
852     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
853 
854     DMLabel label;
855     PetscCall(DMCreateLabel(dm, "Face Sets"));
856     PetscCall(DMGetLabel(dm, "Face Sets", &label));
857     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
858     for (PetscInt i = 0; i < 3; i++) {
859       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
860       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
861       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
862     }
863     PetscInt fStart, fEnd, vStart, vEnd;
864     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
865     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
866     for (PetscInt f = fStart; f < fEnd; f++) {
867       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
868       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
869       PetscInt bc_count[6] = {0};
870       for (PetscInt i = 0; i < npoints; i++) {
871         PetscInt p = points[2 * i];
872         if (p < vStart || vEnd <= p) continue;
873         fverts[num_fverts++] = p;
874         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
875         // Convention here matches DMPlexCreateCubeMesh_Internal
876         bc_count[0] += loc.i == 0;
877         bc_count[1] += loc.i == layout.vextent.i - 1;
878         bc_count[2] += loc.j == 0;
879         bc_count[3] += loc.j == layout.vextent.j - 1;
880         bc_count[4] += loc.k == 0;
881         bc_count[5] += loc.k == layout.vextent.k - 1;
882       }
883       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
884       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
885         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
886           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
887           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
888             PetscInt *put;
889             if (bc % 2 == 0) { // donor face; no label
890               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
891               *put = f;
892             } else { // periodic face
893               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
894               *put = f;
895               ZCode *zput;
896               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
897               for (PetscInt i = 0; i < num_fverts; i++) {
898                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
899                 switch (bc / 2) {
900                 case 0:
901                   loc.i = 0;
902                   break;
903                 case 1:
904                   loc.j = 0;
905                   break;
906                 case 2:
907                   loc.k = 0;
908                   break;
909                 }
910                 *zput++ = ZEncode(loc);
911               }
912             }
913             continue;
914           }
915           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
916           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
917           bc_match++;
918         }
919       }
920     }
921     // Ensure that the Coordinate DM has our new boundary labels
922     DM cdm;
923     PetscCall(DMGetCoordinateDM(dm, &cdm));
924     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
925     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
926       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
927     }
928     for (PetscInt i = 0; i < 3; i++) {
929       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
930       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
931       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
932     }
933   }
934   PetscCall(PetscFree(layout.zstarts));
935   PetscCall(PetscFree(vert_z));
936   PetscFunctionReturn(PETSC_SUCCESS);
937 }
938 
939 /*@
940   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
941 
942   Logically Collective
943 
944   Input Parameters:
945 + dm           - The `DMPLEX` on which to set periodicity
946 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
947 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
948 
949   Level: advanced
950 
951   Note:
952   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
953   and locally, but are paired when creating a global dof space.
954 
955 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
956 @*/
957 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
958 {
959   DM_Plex *plex = (DM_Plex *)dm->data;
960 
961   PetscFunctionBegin;
962   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
963   if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
964   if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
965 
966   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
967 
968   if (plex->periodic.num_face_sfs > 0) {
969     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
970     PetscCall(PetscFree(plex->periodic.face_sfs));
971   }
972 
973   plex->periodic.num_face_sfs = num_face_sfs;
974   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
975   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
976 
977   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
978   if (cdm) {
979     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
980     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
981   }
982   PetscFunctionReturn(PETSC_SUCCESS);
983 }
984 
985 /*@C
986   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
987 
988   Logically Collective
989 
990   Input Parameter:
991 . dm - The `DMPLEX` for which to obtain periodic relation
992 
993   Output Parameter:
994 + num_face_sfs - Number of `PetscSF`s in the array
995 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
996 
997   Level: advanced
998 
999 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1000 @*/
1001 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1002 {
1003   DM_Plex *plex = (DM_Plex *)dm->data;
1004 
1005   PetscFunctionBegin;
1006   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1007   *face_sfs     = plex->periodic.face_sfs;
1008   *num_face_sfs = plex->periodic.num_face_sfs;
1009   PetscFunctionReturn(PETSC_SUCCESS);
1010 }
1011 
1012 /*@C
1013   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1014 
1015   Logically Collective
1016 
1017   Input Parameters:
1018 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1019 . n  - Number of transforms in array
1020 - t  - Array of 4x4 affine transformation basis.
1021 
1022   Level: advanced
1023 
1024   Notes:
1025   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1026   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1027   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1028   simple matrix multiplication.
1029 
1030   Although the interface accepts a general affine transform, only affine translation is supported at present.
1031 
1032   Developer Notes:
1033   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1034   adding GPU implementations to apply the G2L/L2G transforms.
1035 
1036 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1037 @*/
1038 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar *t)
1039 {
1040   DM_Plex *plex = (DM_Plex *)dm->data;
1041 
1042   PetscFunctionBegin;
1043   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1044   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1045 
1046   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1047   for (PetscInt i = 0; i < n; i++) {
1048     for (PetscInt j = 0; j < 4; j++) {
1049       for (PetscInt k = 0; k < 4; k++) {
1050         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1051         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1052       }
1053     }
1054   }
1055   PetscFunctionReturn(PETSC_SUCCESS);
1056 }
1057