xref: /petsc/src/dm/impls/plex/plexsfc.c (revision 57e69c3534c815b4ffde222cf1dbe7510e4001b5)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 static unsigned ZCodeSplit1(ZCode z)
21 {
22   z = ((z & 01001001001001001) | ((z >> 2) & 02002002002002002) | ((z >> 4) & 04004004004004004));
23   z = (z | (z >> 6) | (z >> 12)) & 0000000777000000777;
24   z = (z | (z >> 18)) & 0777777;
25   return (unsigned)z;
26 }
27 
28 static ZCode ZEncode1(unsigned t)
29 {
30   ZCode z = t;
31   z       = (z | (z << 18)) & 0777000000777;
32   z       = (z | (z << 6) | (z << 12)) & 07007007007007007;
33   z       = (z | (z << 2) | (z << 4)) & 0111111111111111111;
34   return z;
35 }
36 
37 static Ijk ZCodeSplit(ZCode z)
38 {
39   Ijk c;
40   c.i = ZCodeSplit1(z >> 2);
41   c.j = ZCodeSplit1(z >> 1);
42   c.k = ZCodeSplit1(z >> 0);
43   return c;
44 }
45 
46 static ZCode ZEncode(Ijk c)
47 {
48   ZCode z = (ZEncode1(c.i) << 2) | (ZEncode1(c.j) << 1) | ZEncode1(c.k);
49   return z;
50 }
51 
52 static PetscBool IjkActive(Ijk extent, Ijk l)
53 {
54   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
55   return PETSC_FALSE;
56 }
57 
58 // If z is not the base of an octet (last 3 bits 0), return 0.
59 //
60 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
61 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
62 static ZCode ZStepOct(ZCode z)
63 {
64   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
65   ZCode step = 07;
66   for (; (z & step) == 0; step = (step << 3) | 07) { }
67   return step >> 3;
68 }
69 
70 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
71 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
72 {
73   PetscFunctionBegin;
74   layout->eextent.i = eextent[0];
75   layout->eextent.j = eextent[1];
76   layout->eextent.k = eextent[2];
77   layout->vextent.i = vextent[0];
78   layout->vextent.j = vextent[1];
79   layout->vextent.k = vextent[2];
80   layout->comm_size = size;
81   layout->zstarts   = NULL;
82   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
83 
84   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
85   ZCode    z           = 0;
86   layout->zstarts[0]   = 0;
87   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
88   for (PetscMPIInt r = 0; r < size; r++) {
89     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
90     for (count = 0; count < elems_needed; z++) {
91       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
92       for (ZCode s = skip;; s >>= 3) {
93         Ijk trial = ZCodeSplit(z + s);
94         if (IjkActive(layout->eextent, trial)) {
95           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
96           count += s + 1;
97           z += s;
98           break;
99         }
100         if (s == 0) { // the whole skip octet is inactive
101           z += skip;
102           break;
103         }
104       }
105     }
106     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
107     //
108     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
109     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
110     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
111     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
112     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
113     // complicate the job of identifying an owner and its offset.
114     //
115     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
116     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
117     // the issue:
118     //
119     // Consider this partition on rank 0 (left) and rank 1.
120     //
121     //    4 --------  5 -- 14 --10 -- 21 --11
122     //                |          |          |
123     // 7 -- 16 --  8  |          |          |
124     // |           |  3 -------  7 -------  9
125     // |           |             |          |
126     // 4 --------  6 ------ 10   |          |
127     // |           |         |   6 -- 16 -- 8
128     // |           |         |
129     // 3 ---11---  5 --18--  9
130     //
131     // The periodic face SF looks like
132     // [0] Number of roots=21, leaves=1, remote ranks=1
133     // [0] 16 <- (0,11)
134     // [1] Number of roots=22, leaves=2, remote ranks=2
135     // [1] 14 <- (0,18)
136     // [1] 21 <- (1,16)
137     //
138     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
139     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
140     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
141     //
142     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
143     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
144     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
145     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
146     // stranded vertices.
147     for (; z <= ZEncode(layout->vextent); z++) {
148       Ijk loc = ZCodeSplit(z);
149       if (IjkActive(layout->eextent, loc)) break;
150       z += ZStepOct(z);
151     }
152     layout->zstarts[r + 1] = z;
153   }
154   layout->zstarts[size] = ZEncode(layout->vextent);
155   PetscFunctionReturn(PETSC_SUCCESS);
156 }
157 
158 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
159 {
160   PetscInt remote_elem = 0;
161   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
162     Ijk loc = ZCodeSplit(rz);
163     if (IjkActive(layout->eextent, loc)) remote_elem++;
164     else rz += ZStepOct(rz);
165   }
166   return remote_elem;
167 }
168 
169 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
170 {
171   PetscInt lo = 0, hi = n;
172 
173   if (n == 0) return -1;
174   while (hi - lo > 1) {
175     PetscInt mid = lo + (hi - lo) / 2;
176     if (key < X[mid]) hi = mid;
177     else lo = mid;
178   }
179   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
180 }
181 
182 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
183 {
184   MPI_Comm    comm;
185   PetscInt    dim, vStart, vEnd;
186   PetscMPIInt size;
187   PetscSF     face_sfs[3];
188   PetscScalar transforms[3][4][4] = {{{0}}};
189 
190   PetscFunctionBegin;
191   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
192   PetscCallMPI(MPI_Comm_size(comm, &size));
193   PetscCall(DMGetDimension(dm, &dim));
194   const PetscInt csize = PetscPowInt(2, dim - 1);
195   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
196 
197   PetscInt num_directions = 0;
198   for (PetscInt direction = 0; direction < dim; direction++) {
199     size_t       num_faces;
200     PetscInt    *faces;
201     ZCode       *donor_verts, *donor_minz;
202     PetscSFNode *leaf;
203 
204     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
205     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
206     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
207     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
208     PetscCall(PetscMalloc1(num_faces, &donor_minz));
209     PetscCall(PetscMalloc1(num_faces, &leaf));
210     for (PetscInt i = 0; i < (PetscInt)num_faces; i++) {
211       ZCode minz = donor_verts[i * csize];
212       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
213       donor_minz[i] = minz;
214     }
215     {
216       PetscBool sorted;
217       PetscCall(PetscSortedInt64(num_faces, (const PetscInt64 *)donor_minz, &sorted));
218       // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
219       // Checking for sorting is a cheap check that there are no duplicates.
220       PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
221     }
222     for (PetscInt i = 0; i < (PetscInt)num_faces;) {
223       ZCode    z           = donor_minz[i];
224       PetscInt remote_rank = ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0;
225       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
226       // Process all the vertices on this rank
227       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
228         Ijk loc = ZCodeSplit(rz);
229         if (rz == z) {
230           leaf[i].rank  = remote_rank;
231           leaf[i].index = remote_count;
232           i++;
233           if (i == (PetscInt)num_faces) break;
234           z = donor_minz[i];
235         }
236         if (IjkActive(layout->vextent, loc)) remote_count++;
237       }
238     }
239     PetscCall(PetscFree(donor_minz));
240     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
241     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, num_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
242     const PetscInt *my_donor_degree;
243     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
244     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
245     PetscInt num_multiroots = 0;
246     for (PetscInt i = 0; i < vEnd - vStart; i++) {
247       num_multiroots += my_donor_degree[i];
248       if (my_donor_degree[i] == 0) continue;
249       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
250     }
251     PetscInt *my_donors, *donor_indices, *my_donor_indices;
252     size_t    num_my_donors;
253     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
254     PetscCheck((PetscInt)num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request does not match expected donors");
255     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
256     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
257     for (PetscInt i = 0; i < (PetscInt)num_my_donors; i++) {
258       PetscInt f = my_donors[i];
259       PetscInt num_points, *points = NULL, minv = PETSC_MAX_INT;
260       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
261       for (PetscInt j = 0; j < num_points; j++) {
262         PetscInt p = points[2 * j];
263         if (p < vStart || vEnd <= p) continue;
264         minv = PetscMin(minv, p);
265       }
266       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
267       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
268       my_donor_indices[minv - vStart] = f;
269     }
270     PetscCall(PetscMalloc1(num_faces, &donor_indices));
271     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
272     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
273     PetscCall(PetscFree(my_donor_indices));
274     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
275     for (PetscInt i = 0; i < (PetscInt)num_faces; i++) leaf[i].index = donor_indices[i];
276     PetscCall(PetscFree(donor_indices));
277     PetscInt pStart, pEnd;
278     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
279     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, num_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
280     {
281       char face_sf_name[PETSC_MAX_PATH_LEN];
282       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
283       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
284     }
285 
286     transforms[num_directions][0][0]         = 1;
287     transforms[num_directions][1][1]         = 1;
288     transforms[num_directions][2][2]         = 1;
289     transforms[num_directions][3][3]         = 1;
290     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
291     num_directions++;
292   }
293 
294   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
295   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
296 
297   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
302 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
303 // We use this crude approach here so we don't have to write new GPU kernels yet.
304 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
305 {
306   PetscFunctionBegin;
307   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
308   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
309     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
310     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
311   }
312   PetscFunctionReturn(PETSC_SUCCESS);
313 }
314 
315 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
316 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
317 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
318 //
319 // Output Arguments:
320 //
321 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
322 //   can be used to create a global section and section SF.
323 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
324 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
325 //
326 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
327 {
328   MPI_Comm           comm;
329   PetscMPIInt        rank;
330   PetscSF            point_sf;
331   PetscInt           nroots, nleaves;
332   const PetscInt    *filocal;
333   const PetscSFNode *firemote;
334 
335   PetscFunctionBegin;
336   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
337   PetscCallMPI(MPI_Comm_rank(comm, &rank));
338   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
339   PetscCall(PetscMalloc1(num_face_sfs, is_points));
340 
341   for (PetscInt f = 0; f < num_face_sfs; f++) {
342     PetscSF   face_sf = face_sfs[f];
343     PetscInt *rootdata, *leafdata;
344 
345     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
346     PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
347     for (PetscInt i = 0; i < nleaves; i++) {
348       PetscInt point = filocal[i], cl_size, *closure = NULL;
349       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
350       leafdata[point] = cl_size - 1;
351       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
352     }
353     PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
354     PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
355 
356     PetscInt root_offset = 0;
357     for (PetscInt p = 0; p < nroots; p++) {
358       const PetscInt *donor_dof = rootdata + nroots;
359       if (donor_dof[p] == 0) {
360         rootdata[2 * p]     = -1;
361         rootdata[2 * p + 1] = -1;
362         continue;
363       }
364       PetscInt  cl_size;
365       PetscInt *closure = NULL;
366       PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
367       // cl_size - 1 = points not including self
368       PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
369       rootdata[2 * p]     = root_offset;
370       rootdata[2 * p + 1] = cl_size - 1;
371       root_offset += cl_size - 1;
372       PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
373     }
374     PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
375     PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
376     // Count how many leaves we need to communicate the closures
377     PetscInt leaf_offset = 0;
378     for (PetscInt i = 0; i < nleaves; i++) {
379       PetscInt point = filocal[i];
380       if (leafdata[2 * point + 1] < 0) continue;
381       leaf_offset += leafdata[2 * point + 1];
382     }
383 
384     PetscSFNode *closure_leaf;
385     PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
386     leaf_offset = 0;
387     for (PetscInt i = 0; i < nleaves; i++) {
388       PetscInt point   = filocal[i];
389       PetscInt cl_size = leafdata[2 * point + 1];
390       if (cl_size < 0) continue;
391       for (PetscInt j = 0; j < cl_size; j++) {
392         closure_leaf[leaf_offset].rank  = firemote[i].rank;
393         closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
394         leaf_offset++;
395       }
396     }
397 
398     PetscSF sf_closure;
399     PetscCall(PetscSFCreate(comm, &sf_closure));
400     PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
401 
402     PetscSFNode *leaf_donor_closure;
403     { // Pack root buffer with owner for every point in the root cones
404       PetscSFNode       *donor_closure;
405       const PetscInt    *pilocal;
406       const PetscSFNode *piremote;
407       PetscInt           npoints;
408 
409       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
410       PetscCall(PetscCalloc1(root_offset, &donor_closure));
411       root_offset = 0;
412       for (PetscInt p = 0; p < nroots; p++) {
413         if (rootdata[2 * p] < 0) continue;
414         PetscInt  cl_size;
415         PetscInt *closure = NULL;
416         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
417         for (PetscInt j = 1; j < cl_size; j++) {
418           PetscInt c = closure[2 * j];
419           if (pilocal) {
420             PetscInt found = -1;
421             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
422             if (found >= 0) {
423               donor_closure[root_offset++] = piremote[found];
424               continue;
425             }
426           }
427           // we own c
428           donor_closure[root_offset].rank  = rank;
429           donor_closure[root_offset].index = c;
430           root_offset++;
431         }
432         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
433       }
434 
435       PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
436       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_2INT, donor_closure, leaf_donor_closure, MPI_REPLACE));
437       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_2INT, donor_closure, leaf_donor_closure, MPI_REPLACE));
438       PetscCall(PetscSFDestroy(&sf_closure));
439       PetscCall(PetscFree(donor_closure));
440     }
441 
442     PetscSFNode *new_iremote;
443     PetscCall(PetscCalloc1(nroots, &new_iremote));
444     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
445     // Walk leaves and match vertices
446     leaf_offset = 0;
447     for (PetscInt i = 0; i < nleaves; i++) {
448       PetscInt  point   = filocal[i], cl_size;
449       PetscInt *closure = NULL;
450       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
451       for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
452         PetscInt    c  = closure[2 * j];
453         PetscSFNode lc = leaf_donor_closure[leaf_offset];
454         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
455         if (new_iremote[c].rank == -1) {
456           new_iremote[c] = lc;
457         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
458         leaf_offset++;
459       }
460       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
461     }
462     PetscCall(PetscFree(leaf_donor_closure));
463 
464     // Include face points in closure SF
465     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
466     // consolidate leaves
467     PetscInt num_new_leaves = 0;
468     for (PetscInt i = 0; i < nroots; i++) {
469       if (new_iremote[i].rank == -1) continue;
470       new_iremote[num_new_leaves] = new_iremote[i];
471       leafdata[num_new_leaves]    = i;
472       num_new_leaves++;
473     }
474     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
475 
476     PetscSF csf;
477     PetscCall(PetscSFCreate(comm, &csf));
478     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
479     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
480     PetscCall(PetscFree2(rootdata, leafdata));
481 
482     PetscInt npoints;
483     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
484     if (npoints < 0) { // empty point_sf
485       *closure_sf = csf;
486     } else {
487       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
488       PetscCall(PetscSFDestroy(&csf));
489     }
490     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
491     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
492   }
493   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
494   PetscFunctionReturn(PETSC_SUCCESS);
495 }
496 
497 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
498 {
499   DM_Plex *plex = (DM_Plex *)dm->data;
500 
501   PetscFunctionBegin;
502   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
503   if (sf) *sf = plex->periodic.composed_sf;
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506 
507 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
508 {
509   DM_Plex    *plex = (DM_Plex *)old_dm->data;
510   PetscSF     sf_point, *new_face_sfs;
511   PetscMPIInt rank;
512 
513   PetscFunctionBegin;
514   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
515   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
516   PetscCall(DMGetPointSF(dm, &sf_point));
517   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
518 
519   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
520     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
521     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
522     const PetscInt    *old_local, *point_local;
523     const PetscSFNode *old_remote, *point_remote;
524     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
525     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
526     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
527     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
528     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
529 
530     // Fill new_leafdata with new owners of all points
531     for (PetscInt i = 0; i < new_npoints; i++) {
532       new_leafdata[i].rank  = rank;
533       new_leafdata[i].index = i;
534     }
535     for (PetscInt i = 0; i < point_nleaf; i++) {
536       PetscInt j      = point_local[i];
537       new_leafdata[j] = point_remote[i];
538     }
539     // REPLACE is okay because every leaf agrees about the new owners
540     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_2INT, new_leafdata, rootdata, MPI_REPLACE));
541     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_2INT, new_leafdata, rootdata, MPI_REPLACE));
542     // rootdata now contains the new owners
543 
544     // Send to leaves of old space
545     for (PetscInt i = 0; i < old_npoints; i++) {
546       leafdata[i].rank  = -1;
547       leafdata[i].index = -1;
548     }
549     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
550     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
551 
552     // Send to new leaf space
553     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_2INT, leafdata, new_leafdata, MPI_REPLACE));
554     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_2INT, leafdata, new_leafdata, MPI_REPLACE));
555 
556     PetscInt     nface = 0, *new_local;
557     PetscSFNode *new_remote;
558     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
559     PetscCall(PetscMalloc1(nface, &new_local));
560     PetscCall(PetscMalloc1(nface, &new_remote));
561     nface = 0;
562     for (PetscInt i = 0; i < new_npoints; i++) {
563       if (new_leafdata[i].rank == -1) continue;
564       new_local[nface]  = i;
565       new_remote[nface] = new_leafdata[i];
566       nface++;
567     }
568     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
569     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
570     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
571     {
572       char new_face_sf_name[PETSC_MAX_PATH_LEN];
573       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
574       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
575     }
576   }
577 
578   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
579   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
580   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
581   PetscCall(PetscFree(new_face_sfs));
582   PetscFunctionReturn(PETSC_SUCCESS);
583 }
584 
585 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
586 {
587   DM_Plex *plex = (DM_Plex *)dm->data;
588   size_t   count;
589   IS       isdof;
590   PetscInt dim;
591 
592   PetscFunctionBegin;
593   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
594   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
596 
597   PetscCall(DMGetDimension(dm, &dim));
598   dm->periodic.num_affines = plex->periodic.num_face_sfs;
599   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
600 
601   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
602     {
603       PetscInt        npoints;
604       const PetscInt *points;
605       IS              is = plex->periodic.periodic_points[f];
606       PetscSegBuffer  seg;
607       PetscSection    section;
608       PetscCall(DMGetLocalSection(dm, &section));
609       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
610       PetscCall(ISGetSize(is, &npoints));
611       PetscCall(ISGetIndices(is, &points));
612       for (PetscInt i = 0; i < npoints; i++) {
613         PetscInt point = points[i], off, dof;
614         PetscCall(PetscSectionGetOffset(section, point, &off));
615         PetscCall(PetscSectionGetDof(section, point, &dof));
616         PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
617         for (PetscInt j = 0; j < dof / dim; j++) {
618           PetscInt *slot;
619           PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
620           *slot = off / dim + j;
621         }
622       }
623       PetscInt *ind;
624       PetscCall(PetscSegBufferGetSize(seg, &count));
625       PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
626       PetscCall(PetscSegBufferDestroy(&seg));
627       PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, count, ind, PETSC_OWN_POINTER, &isdof));
628     }
629     Vec        L, P;
630     VecType    vec_type;
631     VecScatter scatter;
632     PetscCall(DMGetLocalVector(dm, &L));
633     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
634     PetscCall(VecSetSizes(P, count * dim, count * dim));
635     PetscCall(VecGetType(L, &vec_type));
636     PetscCall(VecSetType(P, vec_type));
637     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
638     PetscCall(DMRestoreLocalVector(dm, &L));
639     PetscCall(ISDestroy(&isdof));
640 
641     {
642       PetscScalar *x;
643       PetscCall(VecGetArrayWrite(P, &x));
644       for (PetscInt i = 0; i < (PetscInt)count; i++) {
645         for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
646       }
647       PetscCall(VecRestoreArrayWrite(P, &x));
648     }
649 
650     dm->periodic.affine_to_local[f] = scatter;
651     dm->periodic.affine[f]          = P;
652   }
653   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
654   PetscFunctionReturn(PETSC_SUCCESS);
655 }
656 
657 // We'll just orient all the edges, though only periodic boundary edges need orientation
658 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
659 {
660   PetscInt dim, eStart, eEnd;
661 
662   PetscFunctionBegin;
663   PetscCall(DMGetDimension(dm, &dim));
664   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
665   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
666   for (PetscInt e = eStart; e < eEnd; e++) {
667     const PetscInt *cone;
668     PetscCall(DMPlexGetCone(dm, e, &cone));
669     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
670   }
671   PetscFunctionReturn(PETSC_SUCCESS);
672 }
673 
674 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
675 {
676   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
677   const Ijk closure_1[] = {
678     {0, 0, 0},
679     {1, 0, 0},
680   };
681   const Ijk closure_2[] = {
682     {0, 0, 0},
683     {1, 0, 0},
684     {1, 1, 0},
685     {0, 1, 0},
686   };
687   const Ijk closure_3[] = {
688     {0, 0, 0},
689     {0, 1, 0},
690     {1, 1, 0},
691     {1, 0, 0},
692     {0, 0, 1},
693     {1, 0, 1},
694     {1, 1, 1},
695     {0, 1, 1},
696   };
697   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
698   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
699   const PetscInt        face_marker_1[]   = {1, 2};
700   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
701   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
702   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
703   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
704   // These orientations can be determined by examining cones of a reference quad and hex element.
705   const PetscInt        face_orient_1[]   = {0, 0};
706   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
707   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
708   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
709 
710   PetscFunctionBegin;
711   PetscAssertPointer(dm, 1);
712   PetscValidLogicalCollectiveInt(dm, dim, 2);
713   PetscCall(DMSetDimension(dm, dim));
714   PetscMPIInt rank, size;
715   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
716   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
717   for (PetscInt i = 0; i < dim; i++) {
718     eextent[i] = faces[i];
719     vextent[i] = faces[i] + 1;
720   }
721   ZLayout layout;
722   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
723   PetscZSet vset; // set of all vertices in the closure of the owned elements
724   PetscCall(PetscZSetCreate(&vset));
725   PetscInt local_elems = 0;
726   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
727     Ijk loc = ZCodeSplit(z);
728     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
729     else {
730       z += ZStepOct(z);
731       continue;
732     }
733     if (IjkActive(layout.eextent, loc)) {
734       local_elems++;
735       // Add all neighboring vertices to set
736       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
737         Ijk   inc  = closure_dim[dim][n];
738         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
739         ZCode v    = ZEncode(nloc);
740         PetscCall(PetscZSetAdd(vset, v));
741       }
742     }
743   }
744   PetscInt local_verts, off = 0;
745   ZCode   *vert_z;
746   PetscCall(PetscZSetGetSize(vset, &local_verts));
747   PetscCall(PetscMalloc1(local_verts, &vert_z));
748   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
749   PetscCall(PetscZSetDestroy(&vset));
750   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
751   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
752 
753   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
754   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
755   PetscCall(DMSetUp(dm));
756   {
757     PetscInt e = 0;
758     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
759       Ijk loc = ZCodeSplit(z);
760       if (!IjkActive(layout.eextent, loc)) {
761         z += ZStepOct(z);
762         continue;
763       }
764       PetscInt cone[8], orient[8] = {0};
765       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
766         Ijk      inc  = closure_dim[dim][n];
767         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
768         ZCode    v    = ZEncode(nloc);
769         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
770         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
771         cone[n] = local_elems + ci;
772       }
773       PetscCall(DMPlexSetCone(dm, e, cone));
774       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
775       e++;
776     }
777   }
778 
779   PetscCall(DMPlexSymmetrize(dm));
780   PetscCall(DMPlexStratify(dm));
781 
782   { // Create point SF
783     PetscSF sf;
784     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
785     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
786     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
787     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
788     PetscInt    *local_ghosts;
789     PetscSFNode *ghosts;
790     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
791     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
792     for (PetscInt i = 0; i < num_ghosts;) {
793       ZCode    z           = vert_z[owned_verts + i];
794       PetscInt remote_rank = ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0;
795       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
796       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
797 
798       // Count the elements on remote_rank
799       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
800 
801       // Traverse vertices and make ghost links
802       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
803         Ijk loc = ZCodeSplit(rz);
804         if (rz == z) {
805           local_ghosts[i] = local_elems + owned_verts + i;
806           ghosts[i].rank  = remote_rank;
807           ghosts[i].index = remote_elem + remote_count;
808           i++;
809           if (i == num_ghosts) break;
810           z = vert_z[owned_verts + i];
811         }
812         if (IjkActive(layout.vextent, loc)) remote_count++;
813         else rz += ZStepOct(rz);
814       }
815     }
816     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
817     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
818     PetscCall(DMSetPointSF(dm, sf));
819     PetscCall(PetscSFDestroy(&sf));
820   }
821   {
822     Vec          coordinates;
823     PetscScalar *coords;
824     PetscSection coord_section;
825     PetscInt     coord_size;
826     PetscCall(DMGetCoordinateSection(dm, &coord_section));
827     PetscCall(PetscSectionSetNumFields(coord_section, 1));
828     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
829     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
830     for (PetscInt v = 0; v < local_verts; v++) {
831       PetscInt point = local_elems + v;
832       PetscCall(PetscSectionSetDof(coord_section, point, dim));
833       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
834     }
835     PetscCall(PetscSectionSetUp(coord_section));
836     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
837     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
838     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
839     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
840     PetscCall(VecSetBlockSize(coordinates, dim));
841     PetscCall(VecSetType(coordinates, VECSTANDARD));
842     PetscCall(VecGetArray(coordinates, &coords));
843     for (PetscInt v = 0; v < local_verts; v++) {
844       Ijk loc             = ZCodeSplit(vert_z[v]);
845       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
846       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
847       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
848     }
849     PetscCall(VecRestoreArray(coordinates, &coords));
850     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
851     PetscCall(VecDestroy(&coordinates));
852   }
853   if (interpolate) {
854     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
855     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
856     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
857     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
858     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
859     // be needed in a general CGNS reader, for example.
860     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
861 
862     DMLabel label;
863     PetscCall(DMCreateLabel(dm, "Face Sets"));
864     PetscCall(DMGetLabel(dm, "Face Sets", &label));
865     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
866     for (PetscInt i = 0; i < 3; i++) {
867       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
868       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
869       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
870     }
871     PetscInt fStart, fEnd, vStart, vEnd;
872     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
873     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
874     for (PetscInt f = fStart; f < fEnd; f++) {
875       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
876       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
877       PetscInt bc_count[6] = {0};
878       for (PetscInt i = 0; i < npoints; i++) {
879         PetscInt p = points[2 * i];
880         if (p < vStart || vEnd <= p) continue;
881         fverts[num_fverts++] = p;
882         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
883         // Convention here matches DMPlexCreateCubeMesh_Internal
884         bc_count[0] += loc.i == 0;
885         bc_count[1] += loc.i == layout.vextent.i - 1;
886         bc_count[2] += loc.j == 0;
887         bc_count[3] += loc.j == layout.vextent.j - 1;
888         bc_count[4] += loc.k == 0;
889         bc_count[5] += loc.k == layout.vextent.k - 1;
890       }
891       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
892       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
893         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
894           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
895           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
896             PetscInt *put;
897             if (bc % 2 == 0) { // donor face; no label
898               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
899               *put = f;
900             } else { // periodic face
901               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
902               *put = f;
903               ZCode *zput;
904               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
905               for (PetscInt i = 0; i < num_fverts; i++) {
906                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
907                 switch (bc / 2) {
908                 case 0:
909                   loc.i = 0;
910                   break;
911                 case 1:
912                   loc.j = 0;
913                   break;
914                 case 2:
915                   loc.k = 0;
916                   break;
917                 }
918                 *zput++ = ZEncode(loc);
919               }
920             }
921             continue;
922           }
923           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
924           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
925           bc_match++;
926         }
927       }
928     }
929     // Ensure that the Coordinate DM has our new boundary labels
930     DM cdm;
931     PetscCall(DMGetCoordinateDM(dm, &cdm));
932     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
933     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
934       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
935     }
936     for (PetscInt i = 0; i < 3; i++) {
937       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
938       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
939       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
940     }
941   }
942   PetscCall(PetscFree(layout.zstarts));
943   PetscCall(PetscFree(vert_z));
944   PetscFunctionReturn(PETSC_SUCCESS);
945 }
946 
947 /*@
948   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
949 
950   Logically Collective
951 
952   Input Parameters:
953 + dm           - The `DMPLEX` on which to set periodicity
954 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
955 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
956 
957   Level: advanced
958 
959   Note:
960   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
961   and locally, but are paired when creating a global dof space.
962 
963 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
964 @*/
965 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
966 {
967   DM_Plex *plex = (DM_Plex *)dm->data;
968 
969   PetscFunctionBegin;
970   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
971   if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
972   if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
973 
974   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
975 
976   if (plex->periodic.num_face_sfs > 0) {
977     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
978     PetscCall(PetscFree(plex->periodic.face_sfs));
979   }
980 
981   plex->periodic.num_face_sfs = num_face_sfs;
982   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
983   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
984 
985   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
986   if (cdm) {
987     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
988     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
989   }
990   PetscFunctionReturn(PETSC_SUCCESS);
991 }
992 
993 /*@C
994   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
995 
996   Logically Collective
997 
998   Input Parameter:
999 . dm - The `DMPLEX` for which to obtain periodic relation
1000 
1001   Output Parameters:
1002 + num_face_sfs - Number of `PetscSF`s in the array
1003 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1004 
1005   Level: advanced
1006 
1007 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1008 @*/
1009 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1010 {
1011   DM_Plex *plex = (DM_Plex *)dm->data;
1012 
1013   PetscFunctionBegin;
1014   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1015   *face_sfs     = plex->periodic.face_sfs;
1016   *num_face_sfs = plex->periodic.num_face_sfs;
1017   PetscFunctionReturn(PETSC_SUCCESS);
1018 }
1019 
1020 /*@C
1021   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1022 
1023   Logically Collective
1024 
1025   Input Parameters:
1026 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1027 . n  - Number of transforms in array
1028 - t  - Array of 4x4 affine transformation basis.
1029 
1030   Level: advanced
1031 
1032   Notes:
1033   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1034   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1035   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1036   simple matrix multiplication.
1037 
1038   Although the interface accepts a general affine transform, only affine translation is supported at present.
1039 
1040   Developer Notes:
1041   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1042   adding GPU implementations to apply the G2L/L2G transforms.
1043 
1044 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1045 @*/
1046 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar *t)
1047 {
1048   DM_Plex *plex = (DM_Plex *)dm->data;
1049 
1050   PetscFunctionBegin;
1051   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1052   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1053 
1054   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1055   for (PetscInt i = 0; i < n; i++) {
1056     for (PetscInt j = 0; j < 4; j++) {
1057       for (PetscInt k = 0; k < 4; k++) {
1058         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1059         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1060       }
1061     }
1062   }
1063   PetscFunctionReturn(PETSC_SUCCESS);
1064 }
1065