xref: /petsc/src/dm/impls/plex/plexsfc.c (revision 563af49cd0f3cb78117d53849b118ed24e6d9423)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988
21 static unsigned ZCodeSplit1(ZCode z)
22 {
23   z &= 0x1249249249249249;
24   z = (z | z >> 2) & 0x10c30c30c30c30c3;
25   z = (z | z >> 4) & 0x100f00f00f00f00f;
26   z = (z | z >> 8) & 0x1f0000ff0000ff;
27   z = (z | z >> 16) & 0x1f00000000ffff;
28   z = (z | z >> 32) & 0x1fffff;
29   return (unsigned)z;
30 }
31 
32 static ZCode ZEncode1(unsigned t)
33 {
34   ZCode z = t;
35   z &= 0x1fffff;
36   z = (z | z << 32) & 0x1f00000000ffff;
37   z = (z | z << 16) & 0x1f0000ff0000ff;
38   z = (z | z << 8) & 0x100f00f00f00f00f;
39   z = (z | z << 4) & 0x10c30c30c30c30c3;
40   z = (z | z << 2) & 0x1249249249249249;
41   return z;
42 }
43 
44 static Ijk ZCodeSplit(ZCode z)
45 {
46   Ijk c;
47   c.i = ZCodeSplit1(z >> 2);
48   c.j = ZCodeSplit1(z >> 1);
49   c.k = ZCodeSplit1(z >> 0);
50   return c;
51 }
52 
53 static ZCode ZEncode(Ijk c)
54 {
55   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
56   return z;
57 }
58 
59 static PetscBool IjkActive(Ijk extent, Ijk l)
60 {
61   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
62   return PETSC_FALSE;
63 }
64 
65 // If z is not the base of an octet (last 3 bits 0), return 0.
66 //
67 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
68 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
69 static ZCode ZStepOct(ZCode z)
70 {
71   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
72   ZCode step = 07;
73   for (; (z & step) == 0; step = (step << 3) | 07) { }
74   return step >> 3;
75 }
76 
77 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
78 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
79 {
80   PetscFunctionBegin;
81   layout->eextent.i = eextent[0];
82   layout->eextent.j = eextent[1];
83   layout->eextent.k = eextent[2];
84   layout->vextent.i = vextent[0];
85   layout->vextent.j = vextent[1];
86   layout->vextent.k = vextent[2];
87   layout->comm_size = size;
88   layout->zstarts   = NULL;
89   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
90 
91   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
92   ZCode    z           = 0;
93   layout->zstarts[0]   = 0;
94   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
95   for (PetscMPIInt r = 0; r < size; r++) {
96     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
97     for (count = 0; count < elems_needed; z++) {
98       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
99       for (ZCode s = skip;; s >>= 3) {
100         Ijk trial = ZCodeSplit(z + s);
101         if (IjkActive(layout->eextent, trial)) {
102           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
103           count += s + 1;
104           z += s;
105           break;
106         }
107         if (s == 0) { // the whole skip octet is inactive
108           z += skip;
109           break;
110         }
111       }
112     }
113     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
114     //
115     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
116     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
117     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
118     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
119     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
120     // complicate the job of identifying an owner and its offset.
121     //
122     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
123     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
124     // the issue:
125     //
126     // Consider this partition on rank 0 (left) and rank 1.
127     //
128     //    4 --------  5 -- 14 --10 -- 21 --11
129     //                |          |          |
130     // 7 -- 16 --  8  |          |          |
131     // |           |  3 -------  7 -------  9
132     // |           |             |          |
133     // 4 --------  6 ------ 10   |          |
134     // |           |         |   6 -- 16 -- 8
135     // |           |         |
136     // 3 ---11---  5 --18--  9
137     //
138     // The periodic face SF looks like
139     // [0] Number of roots=21, leaves=1, remote ranks=1
140     // [0] 16 <- (0,11)
141     // [1] Number of roots=22, leaves=2, remote ranks=2
142     // [1] 14 <- (0,18)
143     // [1] 21 <- (1,16)
144     //
145     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
146     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
147     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
148     //
149     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
150     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
151     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
152     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
153     // stranded vertices.
154     for (; z <= ZEncode(layout->vextent); z++) {
155       Ijk loc = ZCodeSplit(z);
156       if (IjkActive(layout->eextent, loc)) break;
157       z += ZStepOct(z);
158     }
159     layout->zstarts[r + 1] = z;
160   }
161   layout->zstarts[size] = ZEncode(layout->vextent);
162   PetscFunctionReturn(PETSC_SUCCESS);
163 }
164 
165 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
166 {
167   PetscInt remote_elem = 0;
168   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
169     Ijk loc = ZCodeSplit(rz);
170     if (IjkActive(layout->eextent, loc)) remote_elem++;
171     else rz += ZStepOct(rz);
172   }
173   return remote_elem;
174 }
175 
176 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
177 {
178   PetscInt lo = 0, hi = n;
179 
180   if (n == 0) return -1;
181   while (hi - lo > 1) {
182     PetscInt mid = lo + (hi - lo) / 2;
183     if (key < X[mid]) hi = mid;
184     else lo = mid;
185   }
186   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
187 }
188 
189 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
190 {
191   MPI_Comm    comm;
192   PetscInt    dim, vStart, vEnd;
193   PetscMPIInt size;
194   PetscSF     face_sfs[3];
195   PetscScalar transforms[3][4][4] = {{{0}}};
196 
197   PetscFunctionBegin;
198   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
199   PetscCallMPI(MPI_Comm_size(comm, &size));
200   PetscCall(DMGetDimension(dm, &dim));
201   const PetscInt csize = PetscPowInt(2, dim - 1);
202   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
203 
204   PetscInt num_directions = 0;
205   for (PetscInt direction = 0; direction < dim; direction++) {
206     PetscCount   num_faces;
207     PetscInt    *faces;
208     ZCode       *donor_verts, *donor_minz;
209     PetscSFNode *leaf;
210     PetscCount   num_multiroots = 0;
211     PetscInt     pStart, pEnd;
212     PetscBool    sorted;
213     PetscInt     inum_faces;
214 
215     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
216     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
217     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
218     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
219     PetscCall(PetscMalloc1(num_faces, &donor_minz));
220     PetscCall(PetscMalloc1(num_faces, &leaf));
221     for (PetscCount i = 0; i < num_faces; i++) {
222       ZCode minz = donor_verts[i * csize];
223 
224       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
225       donor_minz[i] = minz;
226     }
227     PetscCall(PetscIntCast(num_faces, &inum_faces));
228     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
229     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
230     // Checking for sorting is a cheap check that there are no duplicates.
231     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
232     for (PetscCount i = 0; i < num_faces;) {
233       ZCode       z           = donor_minz[i];
234       PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0;
235 
236       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
237       // Process all the vertices on this rank
238       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
239         Ijk loc = ZCodeSplit(rz);
240 
241         if (rz == z) {
242           leaf[i].rank  = remote_rank;
243           leaf[i].index = remote_count;
244           i++;
245           if (i == num_faces) break;
246           z = donor_minz[i];
247         }
248         if (IjkActive(layout->vextent, loc)) remote_count++;
249       }
250     }
251     PetscCall(PetscFree(donor_minz));
252     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
253     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
254     const PetscInt *my_donor_degree;
255     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
256     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
257 
258     for (PetscInt i = 0; i < vEnd - vStart; i++) {
259       num_multiroots += my_donor_degree[i];
260       if (my_donor_degree[i] == 0) continue;
261       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
262     }
263     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
264     PetscCount num_my_donors;
265 
266     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
267     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots);
268     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
269     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
270     for (PetscCount i = 0; i < num_my_donors; i++) {
271       PetscInt f = my_donors[i];
272       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;
273 
274       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
275       for (PetscInt j = 0; j < num_points; j++) {
276         PetscInt p = points[2 * j];
277         if (p < vStart || vEnd <= p) continue;
278         minv = PetscMin(minv, p);
279       }
280       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
281       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
282       my_donor_indices[minv - vStart] = f;
283     }
284     PetscCall(PetscMalloc1(num_faces, &donor_indices));
285     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
286     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
287     PetscCall(PetscFree(my_donor_indices));
288     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
289     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
290     PetscCall(PetscFree(donor_indices));
291     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
292     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
293     {
294       char face_sf_name[PETSC_MAX_PATH_LEN];
295       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
296       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
297     }
298 
299     transforms[num_directions][0][0]         = 1;
300     transforms[num_directions][1][1]         = 1;
301     transforms[num_directions][2][2]         = 1;
302     transforms[num_directions][3][3]         = 1;
303     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
304     num_directions++;
305   }
306 
307   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
308   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
309 
310   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
311   PetscFunctionReturn(PETSC_SUCCESS);
312 }
313 
314 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
315 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
316 // We use this crude approach here so we don't have to write new GPU kernels yet.
317 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
318 {
319   PetscFunctionBegin;
320   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
321   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
322     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
323     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
324   }
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
329 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
330 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
331 //
332 // Output Arguments:
333 //
334 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
335 //   can be used to create a global section and section SF.
336 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
337 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
338 //
339 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
340 {
341   MPI_Comm           comm;
342   PetscMPIInt        rank;
343   PetscSF            point_sf;
344   PetscInt           nroots, nleaves;
345   const PetscInt    *filocal;
346   const PetscSFNode *firemote;
347 
348   PetscFunctionBegin;
349   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
350   PetscCallMPI(MPI_Comm_rank(comm, &rank));
351   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
352   PetscCall(PetscMalloc1(num_face_sfs, is_points));
353 
354   for (PetscInt f = 0; f < num_face_sfs; f++) {
355     PetscSF   face_sf = face_sfs[f];
356     PetscInt *rootdata, *leafdata;
357 
358     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
359     PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
360     for (PetscInt i = 0; i < nleaves; i++) {
361       PetscInt point = filocal[i], cl_size, *closure = NULL;
362       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
363       leafdata[point] = cl_size - 1;
364       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
365     }
366     PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
367     PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
368 
369     PetscInt root_offset = 0;
370     for (PetscInt p = 0; p < nroots; p++) {
371       const PetscInt *donor_dof = rootdata + nroots;
372       if (donor_dof[p] == 0) {
373         rootdata[2 * p]     = -1;
374         rootdata[2 * p + 1] = -1;
375         continue;
376       }
377       PetscInt  cl_size;
378       PetscInt *closure = NULL;
379       PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
380       // cl_size - 1 = points not including self
381       PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
382       rootdata[2 * p]     = root_offset;
383       rootdata[2 * p + 1] = cl_size - 1;
384       root_offset += cl_size - 1;
385       PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
386     }
387     PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
388     PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
389     // Count how many leaves we need to communicate the closures
390     PetscInt leaf_offset = 0;
391     for (PetscInt i = 0; i < nleaves; i++) {
392       PetscInt point = filocal[i];
393       if (leafdata[2 * point + 1] < 0) continue;
394       leaf_offset += leafdata[2 * point + 1];
395     }
396 
397     PetscSFNode *closure_leaf;
398     PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
399     leaf_offset = 0;
400     for (PetscInt i = 0; i < nleaves; i++) {
401       PetscInt point   = filocal[i];
402       PetscInt cl_size = leafdata[2 * point + 1];
403       if (cl_size < 0) continue;
404       for (PetscInt j = 0; j < cl_size; j++) {
405         closure_leaf[leaf_offset].rank  = firemote[i].rank;
406         closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
407         leaf_offset++;
408       }
409     }
410 
411     PetscSF sf_closure;
412     PetscCall(PetscSFCreate(comm, &sf_closure));
413     PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
414 
415     PetscSFNode *leaf_donor_closure;
416     { // Pack root buffer with owner for every point in the root cones
417       PetscSFNode       *donor_closure;
418       const PetscInt    *pilocal;
419       const PetscSFNode *piremote;
420       PetscInt           npoints;
421 
422       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
423       PetscCall(PetscCalloc1(root_offset, &donor_closure));
424       root_offset = 0;
425       for (PetscInt p = 0; p < nroots; p++) {
426         if (rootdata[2 * p] < 0) continue;
427         PetscInt  cl_size;
428         PetscInt *closure = NULL;
429         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
430         for (PetscInt j = 1; j < cl_size; j++) {
431           PetscInt c = closure[2 * j];
432           if (pilocal) {
433             PetscInt found = -1;
434             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
435             if (found >= 0) {
436               donor_closure[root_offset++] = piremote[found];
437               continue;
438             }
439           }
440           // we own c
441           donor_closure[root_offset].rank  = rank;
442           donor_closure[root_offset].index = c;
443           root_offset++;
444         }
445         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
446       }
447 
448       PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
449       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
450       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
451       PetscCall(PetscSFDestroy(&sf_closure));
452       PetscCall(PetscFree(donor_closure));
453     }
454 
455     PetscSFNode *new_iremote;
456     PetscCall(PetscCalloc1(nroots, &new_iremote));
457     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
458     // Walk leaves and match vertices
459     leaf_offset = 0;
460     for (PetscInt i = 0; i < nleaves; i++) {
461       PetscInt  point   = filocal[i], cl_size;
462       PetscInt *closure = NULL;
463       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
464       for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
465         PetscInt    c  = closure[2 * j];
466         PetscSFNode lc = leaf_donor_closure[leaf_offset];
467         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
468         if (new_iremote[c].rank == -1) {
469           new_iremote[c] = lc;
470         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
471         leaf_offset++;
472       }
473       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
474     }
475     PetscCall(PetscFree(leaf_donor_closure));
476 
477     // Include face points in closure SF
478     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
479     // consolidate leaves
480     PetscInt num_new_leaves = 0;
481     for (PetscInt i = 0; i < nroots; i++) {
482       if (new_iremote[i].rank == -1) continue;
483       new_iremote[num_new_leaves] = new_iremote[i];
484       leafdata[num_new_leaves]    = i;
485       num_new_leaves++;
486     }
487     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
488 
489     PetscSF csf;
490     PetscCall(PetscSFCreate(comm, &csf));
491     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
492     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
493     PetscCall(PetscFree2(rootdata, leafdata));
494 
495     PetscInt npoints;
496     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
497     if (npoints < 0) { // empty point_sf
498       *closure_sf = csf;
499     } else {
500       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
501       PetscCall(PetscSFDestroy(&csf));
502     }
503     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
504     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
505   }
506   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
511 {
512   DM_Plex *plex = (DM_Plex *)dm->data;
513 
514   PetscFunctionBegin;
515   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
516   if (sf) *sf = plex->periodic.composed_sf;
517   PetscFunctionReturn(PETSC_SUCCESS);
518 }
519 
520 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
521 {
522   DM_Plex    *plex = (DM_Plex *)old_dm->data;
523   PetscSF     sf_point, *new_face_sfs;
524   PetscMPIInt rank;
525 
526   PetscFunctionBegin;
527   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
528   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
529   PetscCall(DMGetPointSF(dm, &sf_point));
530   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
531 
532   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
533     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
534     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
535     const PetscInt    *old_local, *point_local;
536     const PetscSFNode *old_remote, *point_remote;
537 
538     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
539     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
540     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
541     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
542     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
543 
544     // Fill new_leafdata with new owners of all points
545     for (PetscInt i = 0; i < new_npoints; i++) {
546       new_leafdata[i].rank  = rank;
547       new_leafdata[i].index = i;
548     }
549     for (PetscInt i = 0; i < point_nleaf; i++) {
550       PetscInt j      = point_local[i];
551       new_leafdata[j] = point_remote[i];
552     }
553     // REPLACE is okay because every leaf agrees about the new owners
554     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
555     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
556     // rootdata now contains the new owners
557 
558     // Send to leaves of old space
559     for (PetscInt i = 0; i < old_npoints; i++) {
560       leafdata[i].rank  = -1;
561       leafdata[i].index = -1;
562     }
563     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
564     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
565 
566     // Send to new leaf space
567     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
568     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
569 
570     PetscInt     nface = 0, *new_local;
571     PetscSFNode *new_remote;
572     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
573     PetscCall(PetscMalloc1(nface, &new_local));
574     PetscCall(PetscMalloc1(nface, &new_remote));
575     nface = 0;
576     for (PetscInt i = 0; i < new_npoints; i++) {
577       if (new_leafdata[i].rank == -1) continue;
578       new_local[nface]  = i;
579       new_remote[nface] = new_leafdata[i];
580       nface++;
581     }
582     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
583     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
584     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
585     {
586       char new_face_sf_name[PETSC_MAX_PATH_LEN];
587       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
588       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
589     }
590   }
591 
592   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
593   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
594   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
595   PetscCall(PetscFree(new_face_sfs));
596   PetscFunctionReturn(PETSC_SUCCESS);
597 }
598 
599 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
600 {
601   DM_Plex   *plex = (DM_Plex *)dm->data;
602   PetscCount count;
603   IS         isdof;
604   PetscInt   dim;
605 
606   PetscFunctionBegin;
607   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
608   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
609   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
610 
611   PetscCall(DMGetDimension(dm, &dim));
612   dm->periodic.num_affines = plex->periodic.num_face_sfs;
613   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
614 
615   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
616     PetscInt        npoints, vsize, isize;
617     const PetscInt *points;
618     IS              is = plex->periodic.periodic_points[f];
619     PetscSegBuffer  seg;
620     PetscSection    section;
621     PetscInt       *ind;
622     Vec             L, P;
623     VecType         vec_type;
624     VecScatter      scatter;
625     PetscScalar    *x;
626 
627     PetscCall(DMGetLocalSection(dm, &section));
628     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
629     PetscCall(ISGetSize(is, &npoints));
630     PetscCall(ISGetIndices(is, &points));
631     for (PetscInt i = 0; i < npoints; i++) {
632       PetscInt point = points[i], off, dof;
633 
634       PetscCall(PetscSectionGetOffset(section, point, &off));
635       PetscCall(PetscSectionGetDof(section, point, &dof));
636       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
637       for (PetscInt j = 0; j < dof / dim; j++) {
638         PetscInt *slot;
639 
640         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
641         *slot = off / dim + j;
642       }
643     }
644     PetscCall(PetscSegBufferGetSize(seg, &count));
645     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
646     PetscCall(PetscSegBufferDestroy(&seg));
647     PetscCall(PetscIntCast(count, &isize));
648     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));
649 
650     PetscCall(PetscIntCast(count * dim, &vsize));
651     PetscCall(DMGetLocalVector(dm, &L));
652     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
653     PetscCall(VecSetSizes(P, vsize, vsize));
654     PetscCall(VecGetType(L, &vec_type));
655     PetscCall(VecSetType(P, vec_type));
656     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
657     PetscCall(DMRestoreLocalVector(dm, &L));
658     PetscCall(ISDestroy(&isdof));
659 
660     PetscCall(VecGetArrayWrite(P, &x));
661     for (PetscCount i = 0; i < count; i++) {
662       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
663     }
664     PetscCall(VecRestoreArrayWrite(P, &x));
665 
666     dm->periodic.affine_to_local[f] = scatter;
667     dm->periodic.affine[f]          = P;
668   }
669   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
670   PetscFunctionReturn(PETSC_SUCCESS);
671 }
672 
673 // We'll just orient all the edges, though only periodic boundary edges need orientation
674 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
675 {
676   PetscInt dim, eStart, eEnd;
677 
678   PetscFunctionBegin;
679   PetscCall(DMGetDimension(dm, &dim));
680   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
681   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
682   for (PetscInt e = eStart; e < eEnd; e++) {
683     const PetscInt *cone;
684     PetscCall(DMPlexGetCone(dm, e, &cone));
685     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
686   }
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
691 {
692   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
693   const Ijk closure_1[] = {
694     {0, 0, 0},
695     {1, 0, 0},
696   };
697   const Ijk closure_2[] = {
698     {0, 0, 0},
699     {1, 0, 0},
700     {1, 1, 0},
701     {0, 1, 0},
702   };
703   const Ijk closure_3[] = {
704     {0, 0, 0},
705     {0, 1, 0},
706     {1, 1, 0},
707     {1, 0, 0},
708     {0, 0, 1},
709     {1, 0, 1},
710     {1, 1, 1},
711     {0, 1, 1},
712   };
713   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
714   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
715   const PetscInt        face_marker_1[]   = {1, 2};
716   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
717   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
718   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
719   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
720   // These orientations can be determined by examining cones of a reference quad and hex element.
721   const PetscInt        face_orient_1[]   = {0, 0};
722   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
723   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
724   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
725 
726   PetscFunctionBegin;
727   PetscAssertPointer(dm, 1);
728   PetscValidLogicalCollectiveInt(dm, dim, 2);
729   PetscCall(DMSetDimension(dm, dim));
730   PetscMPIInt rank, size;
731   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
732   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
733   for (PetscInt i = 0; i < dim; i++) {
734     eextent[i] = faces[i];
735     vextent[i] = faces[i] + 1;
736   }
737   ZLayout layout;
738   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
739   PetscZSet vset; // set of all vertices in the closure of the owned elements
740   PetscCall(PetscZSetCreate(&vset));
741   PetscInt local_elems = 0;
742   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
743     Ijk loc = ZCodeSplit(z);
744     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
745     else {
746       z += ZStepOct(z);
747       continue;
748     }
749     if (IjkActive(layout.eextent, loc)) {
750       local_elems++;
751       // Add all neighboring vertices to set
752       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
753         Ijk   inc  = closure_dim[dim][n];
754         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
755         ZCode v    = ZEncode(nloc);
756         PetscCall(PetscZSetAdd(vset, v));
757       }
758     }
759   }
760   PetscInt local_verts, off = 0;
761   ZCode   *vert_z;
762   PetscCall(PetscZSetGetSize(vset, &local_verts));
763   PetscCall(PetscMalloc1(local_verts, &vert_z));
764   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
765   PetscCall(PetscZSetDestroy(&vset));
766   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
767   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
768 
769   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
770   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
771   PetscCall(DMSetUp(dm));
772   {
773     PetscInt e = 0;
774     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
775       Ijk loc = ZCodeSplit(z);
776       if (!IjkActive(layout.eextent, loc)) {
777         z += ZStepOct(z);
778         continue;
779       }
780       PetscInt cone[8], orient[8] = {0};
781       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
782         Ijk      inc  = closure_dim[dim][n];
783         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
784         ZCode    v    = ZEncode(nloc);
785         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
786         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
787         cone[n] = local_elems + ci;
788       }
789       PetscCall(DMPlexSetCone(dm, e, cone));
790       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
791       e++;
792     }
793   }
794 
795   PetscCall(DMPlexSymmetrize(dm));
796   PetscCall(DMPlexStratify(dm));
797 
798   { // Create point SF
799     PetscSF sf;
800     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
801     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
802     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
803     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
804     PetscInt    *local_ghosts;
805     PetscSFNode *ghosts;
806     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
807     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
808     for (PetscInt i = 0; i < num_ghosts;) {
809       ZCode       z           = vert_z[owned_verts + i];
810       PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0;
811       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
812       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
813 
814       // Count the elements on remote_rank
815       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
816 
817       // Traverse vertices and make ghost links
818       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
819         Ijk loc = ZCodeSplit(rz);
820         if (rz == z) {
821           local_ghosts[i] = local_elems + owned_verts + i;
822           ghosts[i].rank  = remote_rank;
823           ghosts[i].index = remote_elem + remote_count;
824           i++;
825           if (i == num_ghosts) break;
826           z = vert_z[owned_verts + i];
827         }
828         if (IjkActive(layout.vextent, loc)) remote_count++;
829         else rz += ZStepOct(rz);
830       }
831     }
832     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
833     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
834     PetscCall(DMSetPointSF(dm, sf));
835     PetscCall(PetscSFDestroy(&sf));
836   }
837   {
838     Vec          coordinates;
839     PetscScalar *coords;
840     PetscSection coord_section;
841     PetscInt     coord_size;
842     PetscCall(DMGetCoordinateSection(dm, &coord_section));
843     PetscCall(PetscSectionSetNumFields(coord_section, 1));
844     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
845     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
846     for (PetscInt v = 0; v < local_verts; v++) {
847       PetscInt point = local_elems + v;
848       PetscCall(PetscSectionSetDof(coord_section, point, dim));
849       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
850     }
851     PetscCall(PetscSectionSetUp(coord_section));
852     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
853     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
854     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
855     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
856     PetscCall(VecSetBlockSize(coordinates, dim));
857     PetscCall(VecSetType(coordinates, VECSTANDARD));
858     PetscCall(VecGetArray(coordinates, &coords));
859     for (PetscInt v = 0; v < local_verts; v++) {
860       Ijk loc             = ZCodeSplit(vert_z[v]);
861       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
862       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
863       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
864     }
865     PetscCall(VecRestoreArray(coordinates, &coords));
866     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
867     PetscCall(VecDestroy(&coordinates));
868   }
869   if (interpolate) {
870     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
871     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
872     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
873     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
874     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
875     // be needed in a general CGNS reader, for example.
876     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
877 
878     DMLabel label;
879     PetscCall(DMCreateLabel(dm, "Face Sets"));
880     PetscCall(DMGetLabel(dm, "Face Sets", &label));
881     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
882     for (PetscInt i = 0; i < 3; i++) {
883       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
884       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
885       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
886     }
887     PetscInt fStart, fEnd, vStart, vEnd;
888     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
889     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
890     for (PetscInt f = fStart; f < fEnd; f++) {
891       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
892       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
893       PetscInt bc_count[6] = {0};
894       for (PetscInt i = 0; i < npoints; i++) {
895         PetscInt p = points[2 * i];
896         if (p < vStart || vEnd <= p) continue;
897         fverts[num_fverts++] = p;
898         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
899         // Convention here matches DMPlexCreateCubeMesh_Internal
900         bc_count[0] += loc.i == 0;
901         bc_count[1] += loc.i == layout.vextent.i - 1;
902         bc_count[2] += loc.j == 0;
903         bc_count[3] += loc.j == layout.vextent.j - 1;
904         bc_count[4] += loc.k == 0;
905         bc_count[5] += loc.k == layout.vextent.k - 1;
906       }
907       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
908       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
909         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
910           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
911           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
912             PetscInt *put;
913             if (bc % 2 == 0) { // donor face; no label
914               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
915               *put = f;
916             } else { // periodic face
917               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
918               *put = f;
919               ZCode *zput;
920               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
921               for (PetscInt i = 0; i < num_fverts; i++) {
922                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
923                 switch (bc / 2) {
924                 case 0:
925                   loc.i = 0;
926                   break;
927                 case 1:
928                   loc.j = 0;
929                   break;
930                 case 2:
931                   loc.k = 0;
932                   break;
933                 }
934                 *zput++ = ZEncode(loc);
935               }
936             }
937             continue;
938           }
939           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
940           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
941           bc_match++;
942         }
943       }
944     }
945     // Ensure that the Coordinate DM has our new boundary labels
946     DM cdm;
947     PetscCall(DMGetCoordinateDM(dm, &cdm));
948     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
949     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
950       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
951     }
952     for (PetscInt i = 0; i < 3; i++) {
953       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
954       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
955       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
956     }
957   }
958   PetscCall(PetscFree(layout.zstarts));
959   PetscCall(PetscFree(vert_z));
960   PetscFunctionReturn(PETSC_SUCCESS);
961 }
962 
963 /*@
964   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
965 
966   Logically Collective
967 
968   Input Parameters:
969 + dm           - The `DMPLEX` on which to set periodicity
970 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
971 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
972 
973   Level: advanced
974 
975   Note:
976   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
977   and locally, but are paired when creating a global dof space.
978 
979 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
980 @*/
981 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
982 {
983   DM_Plex *plex = (DM_Plex *)dm->data;
984 
985   PetscFunctionBegin;
986   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
987   if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
988   if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
989 
990   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
991 
992   if (plex->periodic.num_face_sfs > 0) {
993     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
994     PetscCall(PetscFree(plex->periodic.face_sfs));
995   }
996 
997   plex->periodic.num_face_sfs = num_face_sfs;
998   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
999   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
1000 
1001   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
1002   if (cdm) {
1003     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
1004     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
1005   }
1006   PetscFunctionReturn(PETSC_SUCCESS);
1007 }
1008 
1009 /*@C
1010   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
1011 
1012   Logically Collective
1013 
1014   Input Parameter:
1015 . dm - The `DMPLEX` for which to obtain periodic relation
1016 
1017   Output Parameters:
1018 + num_face_sfs - Number of `PetscSF`s in the array
1019 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1020 
1021   Level: advanced
1022 
1023 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1024 @*/
1025 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1026 {
1027   DM_Plex *plex = (DM_Plex *)dm->data;
1028 
1029   PetscFunctionBegin;
1030   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1031   *face_sfs     = plex->periodic.face_sfs;
1032   *num_face_sfs = plex->periodic.num_face_sfs;
1033   PetscFunctionReturn(PETSC_SUCCESS);
1034 }
1035 
1036 /*@C
1037   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1038 
1039   Logically Collective
1040 
1041   Input Parameters:
1042 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1043 . n  - Number of transforms in array
1044 - t  - Array of 4x4 affine transformation basis.
1045 
1046   Level: advanced
1047 
1048   Notes:
1049   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1050   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1051   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1052   simple matrix multiplication.
1053 
1054   Although the interface accepts a general affine transform, only affine translation is supported at present.
1055 
1056   Developer Notes:
1057   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1058   adding GPU implementations to apply the G2L/L2G transforms.
1059 
1060 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1061 @*/
1062 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1063 {
1064   DM_Plex *plex = (DM_Plex *)dm->data;
1065 
1066   PetscFunctionBegin;
1067   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1068   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1069 
1070   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1071   for (PetscInt i = 0; i < n; i++) {
1072     for (PetscInt j = 0; j < 4; j++) {
1073       for (PetscInt k = 0; k < 4; k++) {
1074         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1075         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1076       }
1077     }
1078   }
1079   PetscFunctionReturn(PETSC_SUCCESS);
1080 }
1081