xref: /petsc/src/dm/impls/plex/plexsfc.c (revision 45221fc9ed767351a56a7f0ee9f36e389b52695a)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 // ***** Overview of ZCode *******
21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index.
22 // This is known as Morton encoding, and is refered to as ZCode in this code.
23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be:
24 //    [k2,j2,i2,k1,j1,i1,k0,j0,i0]
25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`).
26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices.
27 
28 // Decodes the leading interleaved index from a ZCode
29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0]
30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 (translated to octal)
31 static unsigned ZCodeSplit1(ZCode z)
32 {
33   z &= 0111111111111111111111;
34   z = (z | z >> 2) & 0103030303030303030303;
35   z = (z | z >> 4) & 0100170017001700170017;
36   z = (z | z >> 8) & 0370000037700000377;
37   z = (z | z >> 16) & 0370000000000177777;
38   z = (z | z >> 32) & 07777777;
39   return (unsigned)z;
40 }
41 
42 // Encodes the leading interleaved index from a ZCode
43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0]
44 static ZCode ZEncode1(unsigned t)
45 {
46   ZCode z = t;
47   z &= 07777777;
48   z = (z | z << 32) & 0370000000000177777;
49   z = (z | z << 16) & 0370000037700000377;
50   z = (z | z << 8) & 0100170017001700170017;
51   z = (z | z << 4) & 0103030303030303030303;
52   z = (z | z << 2) & 0111111111111111111111;
53   return z;
54 }
55 
56 // Decodes i j k indices from a ZCode.
57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode
58 static Ijk ZCodeSplit(ZCode z)
59 {
60   Ijk c;
61   c.i = ZCodeSplit1(z >> 2);
62   c.j = ZCodeSplit1(z >> 1);
63   c.k = ZCodeSplit1(z >> 0);
64   return c;
65 }
66 
67 // Encodes i j k indices to a ZCode.
68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement
69 static ZCode ZEncode(Ijk c)
70 {
71   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
72   return z;
73 }
74 
75 static PetscBool IjkActive(Ijk extent, Ijk l)
76 {
77   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
78   return PETSC_FALSE;
79 }
80 
81 // If z is not the base of an octet (last 3 bits 0), return 0.
82 //
83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
85 static ZCode ZStepOct(ZCode z)
86 {
87   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
88   ZCode step = 07;
89   for (; (z & step) == 0; step = (step << 3) | 07) { }
90   return step >> 3;
91 }
92 
93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
95 {
96   PetscFunctionBegin;
97   layout->eextent.i = eextent[0];
98   layout->eextent.j = eextent[1];
99   layout->eextent.k = eextent[2];
100   layout->vextent.i = vextent[0];
101   layout->vextent.j = vextent[1];
102   layout->vextent.k = vextent[2];
103   layout->comm_size = size;
104   layout->zstarts   = NULL;
105   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
106 
107   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
108   ZCode    z           = 0;
109   layout->zstarts[0]   = 0;
110   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
111   for (PetscMPIInt r = 0; r < size; r++) {
112     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
113     for (count = 0; count < elems_needed; z++) {
114       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
115       for (ZCode s = skip;; s >>= 3) {
116         Ijk trial = ZCodeSplit(z + s);
117         if (IjkActive(layout->eextent, trial)) {
118           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
119           count += s + 1;
120           z += s;
121           break;
122         }
123         if (s == 0) { // the whole skip octet is inactive
124           z += skip;
125           break;
126         }
127       }
128     }
129     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
130     //
131     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
132     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
133     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
134     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
135     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
136     // complicate the job of identifying an owner and its offset.
137     //
138     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
139     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
140     // the issue:
141     //
142     // Consider this partition on rank 0 (left) and rank 1.
143     //
144     //    4 --------  5 -- 14 --10 -- 21 --11
145     //                |          |          |
146     // 7 -- 16 --  8  |          |          |
147     // |           |  3 -------  7 -------  9
148     // |           |             |          |
149     // 4 --------  6 ------ 10   |          |
150     // |           |         |   6 -- 16 -- 8
151     // |           |         |
152     // 3 ---11---  5 --18--  9
153     //
154     // The periodic face SF looks like
155     // [0] Number of roots=21, leaves=1, remote ranks=1
156     // [0] 16 <- (0,11)
157     // [1] Number of roots=22, leaves=2, remote ranks=2
158     // [1] 14 <- (0,18)
159     // [1] 21 <- (1,16)
160     //
161     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
162     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
163     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
164     //
165     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
166     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
167     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
168     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
169     // stranded vertices.
170     for (; z <= ZEncode(layout->vextent); z++) {
171       Ijk loc = ZCodeSplit(z);
172       if (IjkActive(layout->eextent, loc)) break;
173       z += ZStepOct(z);
174     }
175     layout->zstarts[r + 1] = z;
176   }
177   layout->zstarts[size] = ZEncode(layout->vextent);
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
182 {
183   PetscInt remote_elem = 0;
184   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
185     Ijk loc = ZCodeSplit(rz);
186     if (IjkActive(layout->eextent, loc)) remote_elem++;
187     else rz += ZStepOct(rz);
188   }
189   return remote_elem;
190 }
191 
192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
193 {
194   PetscInt lo = 0, hi = n;
195 
196   if (n == 0) return -1;
197   while (hi - lo > 1) {
198     PetscInt mid = lo + (hi - lo) / 2;
199     if (key < X[mid]) hi = mid;
200     else lo = mid;
201   }
202   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
203 }
204 
205 static inline PetscBool IsPointInsideStratum(PetscInt point, PetscInt pStart, PetscInt pEnd)
206 {
207   return (point >= pStart && point < pEnd) ? PETSC_TRUE : PETSC_FALSE;
208 }
209 
210 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
211 {
212   MPI_Comm    comm;
213   PetscInt    dim, vStart, vEnd;
214   PetscMPIInt size;
215   PetscSF     face_sfs[3];
216   PetscScalar transforms[3][4][4] = {{{0}}};
217 
218   PetscFunctionBegin;
219   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
220   PetscCallMPI(MPI_Comm_size(comm, &size));
221   PetscCall(DMGetDimension(dm, &dim));
222   const PetscInt csize = PetscPowInt(2, dim - 1);
223   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
224 
225   PetscInt num_directions = 0;
226   for (PetscInt direction = 0; direction < dim; direction++) {
227     PetscCount   num_faces;
228     PetscInt    *faces;
229     ZCode       *donor_verts, *donor_minz;
230     PetscSFNode *leaf;
231     PetscCount   num_multiroots = 0;
232     PetscInt     pStart, pEnd;
233     PetscBool    sorted;
234     PetscInt     inum_faces;
235 
236     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
237     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
238     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
239     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
240     PetscCall(PetscMalloc1(num_faces, &donor_minz));
241     PetscCall(PetscMalloc1(num_faces, &leaf));
242     for (PetscCount i = 0; i < num_faces; i++) {
243       ZCode minz = donor_verts[i * csize];
244 
245       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
246       donor_minz[i] = minz;
247     }
248     PetscCall(PetscIntCast(num_faces, &inum_faces));
249     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
250     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
251     // Checking for sorting is a cheap check that there are no duplicates.
252     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
253     for (PetscCount i = 0; i < num_faces;) {
254       ZCode       z = donor_minz[i];
255       PetscMPIInt remote_rank, remote_count = 0;
256 
257       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout->zstarts), &remote_rank));
258       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
259       // Process all the vertices on this rank
260       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
261         Ijk loc = ZCodeSplit(rz);
262 
263         if (rz == z) {
264           leaf[i].rank  = remote_rank;
265           leaf[i].index = remote_count;
266           i++;
267           if (i == num_faces) break;
268           z = donor_minz[i];
269         }
270         if (IjkActive(layout->vextent, loc)) remote_count++;
271       }
272     }
273     PetscCall(PetscFree(donor_minz));
274     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
275     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
276     const PetscInt *my_donor_degree;
277     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
278     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
279 
280     for (PetscInt i = 0; i < vEnd - vStart; i++) {
281       num_multiroots += my_donor_degree[i];
282       if (my_donor_degree[i] == 0) continue;
283       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
284     }
285     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
286     PetscCount num_my_donors;
287 
288     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
289     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots);
290     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
291     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
292     for (PetscCount i = 0; i < num_my_donors; i++) {
293       PetscInt f = my_donors[i];
294       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;
295 
296       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
297       for (PetscInt j = 0; j < num_points; j++) {
298         PetscInt p = points[2 * j];
299         if (!IsPointInsideStratum(p, vStart, vEnd)) continue;
300         minv = PetscMin(minv, p);
301       }
302       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
303       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
304       my_donor_indices[minv - vStart] = f;
305     }
306     PetscCall(PetscMalloc1(num_faces, &donor_indices));
307     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
308     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
309     PetscCall(PetscFree(my_donor_indices));
310     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
311     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
312     PetscCall(PetscFree(donor_indices));
313     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
314     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
315     {
316       char face_sf_name[PETSC_MAX_PATH_LEN];
317       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
318       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
319     }
320 
321     transforms[num_directions][0][0]         = 1;
322     transforms[num_directions][1][1]         = 1;
323     transforms[num_directions][2][2]         = 1;
324     transforms[num_directions][3][3]         = 1;
325     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
326     num_directions++;
327   }
328 
329   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
330   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
331 
332   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
337 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
338 // We use this crude approach here so we don't have to write new GPU kernels yet.
339 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
340 {
341   PetscFunctionBegin;
342   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
343   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
344     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
345     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
346   }
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 // Modify Vec based on the transformation of `point` for the given section and field
351 static PetscErrorCode DMPlexOrientFieldPointVec(DM dm, PetscSection section, PetscInt field, Vec V, PetscInt point, PetscInt orientation)
352 {
353   PetscScalar        *copy, *V_arr;
354   PetscInt            dof, off, point_ornt[2] = {point, orientation};
355   const PetscInt    **perms;
356   const PetscScalar **rots;
357 
358   PetscFunctionBeginUser;
359   PetscCall(PetscSectionGetDof(section, point, &dof));
360   PetscCall(PetscSectionGetOffset(section, point, &off));
361   PetscCall(VecGetArray(V, &V_arr));
362   PetscCall(DMGetWorkArray(dm, dof, MPIU_SCALAR, &copy));
363   PetscArraycpy(copy, &V_arr[off], dof);
364 
365   PetscCall(PetscSectionGetFieldPointSyms(section, field, 1, point_ornt, &perms, &rots));
366   for (PetscInt i = 0; i < dof; i++) {
367     if (perms[0]) V_arr[off + perms[0][i]] = copy[i];
368     if (rots[0]) V_arr[off + perms[0][i]] *= rots[0][i];
369   }
370 
371   PetscCall(PetscSectionRestoreFieldPointSyms(section, field, 1, point_ornt, &perms, &rots));
372   PetscCall(DMRestoreWorkArray(dm, dof, MPIU_SCALAR, &copy));
373   PetscCall(VecRestoreArray(V, &V_arr));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 // Reorient the point in the DMPlex while also applying necessary corrections to other structures (e.g. coordinates)
378 static PetscErrorCode DMPlexOrientPointWithCorrections(DM dm, PetscInt point, PetscInt ornt)
379 {
380   // TODO: Potential speed up if we early exit for ornt == 0 (i.e. if ornt is identity, we don't need to do anything)
381   PetscFunctionBeginUser;
382   PetscCall(DMPlexOrientPoint(dm, point, ornt));
383 
384   { // Correct coordinates based on new cone ordering
385     DM           cdm;
386     PetscSection csection;
387     Vec          coordinates;
388     PetscInt     pStart, pEnd;
389 
390     PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
391     PetscCall(DMGetCoordinateDM(dm, &cdm));
392     PetscCall(DMGetLocalSection(cdm, &csection));
393     PetscCall(PetscSectionGetChart(csection, &pStart, &pEnd));
394     if (IsPointInsideStratum(point, pStart, pEnd)) PetscCall(DMPlexOrientFieldPointVec(cdm, csection, 0, coordinates, point, ornt));
395   }
396   // TODO: Correct sfNatural
397   PetscFunctionReturn(PETSC_SUCCESS);
398 }
399 
400 // Creates SF to communicate data from donor to periodic faces. The data can be different sizes per donor-periodic pair and is given in `point_sizes[]`
401 static PetscErrorCode CreateDonorToPeriodicSF(DM dm, PetscSF face_sf, PetscInt pStart, PetscInt pEnd, const PetscInt point_sizes[], PetscInt *rootbuffersize, PetscInt *leafbuffersize, PetscBT *rootbt, PetscSF *sf_closure)
402 {
403   MPI_Comm           comm;
404   PetscMPIInt        rank;
405   PetscInt           nroots, nleaves;
406   PetscInt          *rootdata, *leafdata;
407   const PetscInt    *filocal;
408   const PetscSFNode *firemote;
409 
410   PetscFunctionBeginUser;
411   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
412   PetscCallMPI(MPI_Comm_rank(comm, &rank));
413 
414   PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
415   PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
416   for (PetscInt i = 0; i < nleaves; i++) {
417     PetscInt point = filocal[i];
418     PetscCheck(IsPointInsideStratum(point, pStart, pEnd), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in leaves exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", point, pStart, pEnd);
419     leafdata[point] = point_sizes[point - pStart];
420   }
421   PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
422   PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
423 
424   PetscInt root_offset = 0;
425   PetscCall(PetscBTCreate(nroots, rootbt));
426   for (PetscInt p = 0; p < nroots; p++) {
427     const PetscInt *donor_dof = rootdata + nroots;
428     if (donor_dof[p] == 0) {
429       rootdata[2 * p]     = -1;
430       rootdata[2 * p + 1] = -1;
431       continue;
432     }
433     PetscCall(PetscBTSet(*rootbt, p));
434     PetscCheck(IsPointInsideStratum(p, pStart, pEnd), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in roots exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", p, pStart, pEnd);
435     PetscInt p_size = point_sizes[p - pStart];
436     PetscCheck(donor_dof[p] == p_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf data size (%" PetscInt_FMT ") does not match root data size (%" PetscInt_FMT ")", donor_dof[p], p_size);
437     rootdata[2 * p]     = root_offset;
438     rootdata[2 * p + 1] = p_size;
439     root_offset += p_size;
440   }
441   PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
442   PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
443   // Count how many leaves we need to communicate the closures
444   PetscInt leaf_offset = 0;
445   for (PetscInt i = 0; i < nleaves; i++) {
446     PetscInt point = filocal[i];
447     if (leafdata[2 * point + 1] < 0) continue;
448     leaf_offset += leafdata[2 * point + 1];
449   }
450 
451   PetscSFNode *closure_leaf;
452   PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
453   leaf_offset = 0;
454   for (PetscInt i = 0; i < nleaves; i++) {
455     PetscInt point   = filocal[i];
456     PetscInt cl_size = leafdata[2 * point + 1];
457     if (cl_size < 0) continue;
458     for (PetscInt j = 0; j < cl_size; j++) {
459       closure_leaf[leaf_offset].rank  = firemote[i].rank;
460       closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
461       leaf_offset++;
462     }
463   }
464 
465   PetscCall(PetscSFCreate(comm, sf_closure));
466   PetscCall(PetscSFSetGraph(*sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
467   *rootbuffersize = root_offset;
468   *leafbuffersize = leaf_offset;
469   PetscCall(PetscFree2(rootdata, leafdata));
470   PetscFunctionReturn(PETSC_SUCCESS);
471 }
472 
473 // Determine if `key` is in `array`. `array` does NOT need to be sorted
474 static inline PetscBool SearchIntArray(PetscInt key, PetscInt array_size, const PetscInt array[])
475 {
476   for (PetscInt i = 0; i < array_size; i++)
477     if (array[i] == key) return PETSC_TRUE;
478   return PETSC_FALSE;
479 }
480 
481 // Translate a cone in periodic points to the cone in donor points based on the `periodic2donor` array
482 static inline PetscErrorCode TranslateConeP2D(const PetscInt periodic_cone[], PetscInt cone_size, const PetscInt periodic2donor[], PetscInt p2d_count, PetscInt p2d_cone[])
483 {
484   PetscFunctionBeginUser;
485   for (PetscInt p = 0; p < cone_size; p++) {
486     PetscInt p2d_index = -1;
487     for (PetscInt p2d = 0; p2d < p2d_count; p2d++) {
488       if (periodic2donor[p2d * 2] == periodic_cone[p]) p2d_index = p2d;
489     }
490     PetscCheck(p2d_index >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find periodic point in periodic-to-donor array");
491     p2d_cone[p] = periodic2donor[2 * p2d_index + 1];
492   }
493   PetscFunctionReturn(PETSC_SUCCESS);
494 }
495 
496 // Corrects the cone order of periodic faces (and their transitive closure's cones) to match their donor face pair.
497 //
498 // This is done by:
499 // 1. Communicating the donor's vertex coordinates and recursive cones (i.e. its own cone and those of it's constituent edges) to it's periodic pairs
500 //    - The donor vertices have the isoperiodic transform applied to them such that they should match exactly
501 // 2. Translating the periodic vertices into the donor vertices point IDs
502 // 3. Translating the cone of each periodic point into the donor point IDs
503 // 4. Comparing the periodic-to-donor cone to the donor cone for each point
504 // 5. Apply the necessary transformation to the periodic cone to make it match the donor cone
505 static PetscErrorCode DMPlexCorrectOrientationForIsoperiodic(DM dm)
506 {
507   MPI_Comm        comm;
508   DM_Plex        *plex = (DM_Plex *)dm->data;
509   PetscInt        nroots, nleaves;
510   const PetscInt *filocal;
511   DM              cdm;
512   PetscSection    csection;
513   Vec             coordinates;
514   PetscInt        coords_field_id = 0;
515   PetscBool       debug_printing  = PETSC_FALSE;
516 
517   PetscFunctionBeginUser;
518   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
519   PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
520   PetscCheck(coordinates, comm, PETSC_ERR_ARG_WRONGSTATE, "DM must have coordinates to setup isoperiodic");
521   PetscCall(DMGetCoordinateDM(dm, &cdm));
522   PetscCall(DMGetLocalSection(cdm, &csection));
523 
524   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
525     PetscSF face_sf                  = plex->periodic.face_sfs[f];
526     const PetscScalar(*transform)[4] = (const PetscScalar(*)[4])plex->periodic.transform[f];
527     PetscInt *face_vertices_size, *face_cones_size;
528     PetscInt  fStart, fEnd, vStart, vEnd, rootnumvert, leafnumvert, rootconesize, leafconesize, dim;
529     PetscSF   sf_vert_coords, sf_face_cones;
530     PetscBT   rootbt;
531 
532     PetscCall(DMGetCoordinateDim(dm, &dim));
533     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
534     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
535     PetscCall(PetscCalloc2(fEnd - fStart, &face_vertices_size, fEnd - fStart, &face_cones_size));
536 
537     // Create SFs to communicate donor vertices and donor cones to periodic faces
538     for (PetscInt f = fStart, index = 0; f < fEnd; f++, index++) {
539       PetscInt cl_size, *closure = NULL, num_vertices = 0;
540       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
541       for (PetscInt p = 0; p < cl_size; p++) {
542         PetscInt cl_point = closure[2 * p];
543         if (IsPointInsideStratum(cl_point, vStart, vEnd)) num_vertices++;
544         else {
545           PetscInt cone_size;
546           PetscCall(DMPlexGetConeSize(dm, cl_point, &cone_size));
547           face_cones_size[index] += cone_size + 2;
548         }
549       }
550       face_vertices_size[index] = num_vertices;
551       face_cones_size[index] += num_vertices;
552       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
553     }
554     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, face_vertices_size, &rootnumvert, &leafnumvert, &rootbt, &sf_vert_coords));
555     PetscCall(PetscBTDestroy(&rootbt));
556     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, face_cones_size, &rootconesize, &leafconesize, &rootbt, &sf_face_cones));
557 
558     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, NULL));
559 
560     PetscReal *leaf_donor_coords;
561     PetscInt  *leaf_donor_cones;
562 
563     { // Communicate donor coords and cones to the periodic faces
564       PetscReal         *mydonor_vertices;
565       PetscInt          *mydonor_cones;
566       const PetscScalar *coords_arr;
567 
568       PetscCall(PetscCalloc2(rootnumvert * dim, &mydonor_vertices, rootconesize, &mydonor_cones));
569       PetscCall(VecGetArrayRead(coordinates, &coords_arr));
570       for (PetscInt donor_face = 0, donor_vert_offset = 0, donor_cone_offset = 0; donor_face < nroots; donor_face++) {
571         if (!PetscBTLookup(rootbt, donor_face)) continue;
572         PetscInt cl_size, *closure = NULL;
573 
574         PetscCall(DMPlexGetTransitiveClosure(dm, donor_face, PETSC_TRUE, &cl_size, &closure));
575         // Pack vertex coordinates
576         for (PetscInt p = 0; p < cl_size; p++) {
577           PetscInt cl_point = closure[2 * p], dof, offset;
578           if (!IsPointInsideStratum(cl_point, vStart, vEnd)) continue;
579           mydonor_cones[donor_cone_offset++] = cl_point;
580           PetscCall(PetscSectionGetFieldDof(csection, cl_point, coords_field_id, &dof));
581           PetscCall(PetscSectionGetFieldOffset(csection, cl_point, coords_field_id, &offset));
582           PetscAssert(dof == dim, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " has dof size %" PetscInt_FMT ", but should match dimension size %" PetscInt_FMT, cl_point, dof, dim);
583           // Apply isoperiodic transform to donor vertices such that corresponding periodic vertices should match exactly
584           for (PetscInt d = 0; d < dof; d++) mydonor_vertices[donor_vert_offset * dim + d] = PetscRealPart(coords_arr[offset + d]) + PetscRealPart(transform[d][3]);
585           donor_vert_offset++;
586         }
587         // Pack cones of face points (including face itself)
588         for (PetscInt p = 0; p < cl_size; p++) {
589           PetscInt        cl_point = closure[2 * p], cone_size, depth;
590           const PetscInt *cone;
591 
592           PetscCall(DMPlexGetConeSize(dm, cl_point, &cone_size));
593           PetscCall(DMPlexGetCone(dm, cl_point, &cone));
594           PetscCall(DMPlexGetPointDepth(dm, cl_point, &depth));
595           if (depth == 0) continue; // don't include vertex depth
596           mydonor_cones[donor_cone_offset++] = cone_size;
597           mydonor_cones[donor_cone_offset++] = cl_point;
598           PetscArraycpy(&mydonor_cones[donor_cone_offset], cone, cone_size);
599           donor_cone_offset += cone_size;
600         }
601         PetscCall(DMPlexRestoreTransitiveClosure(dm, donor_face, PETSC_TRUE, &cl_size, &closure));
602       }
603       PetscCall(VecRestoreArrayRead(coordinates, &coords_arr));
604       PetscCall(PetscBTDestroy(&rootbt));
605 
606       MPI_Datatype vertex_unit;
607       PetscCallMPI(MPI_Type_contiguous(dim, MPIU_REAL, &vertex_unit));
608       PetscCallMPI(MPI_Type_commit(&vertex_unit));
609       PetscCall(PetscMalloc2(leafnumvert * 3, &leaf_donor_coords, leafconesize, &leaf_donor_cones));
610       PetscCall(PetscSFBcastBegin(sf_vert_coords, vertex_unit, mydonor_vertices, leaf_donor_coords, MPI_REPLACE));
611       PetscCall(PetscSFBcastBegin(sf_face_cones, MPIU_INT, mydonor_cones, leaf_donor_cones, MPI_REPLACE));
612       PetscCall(PetscSFBcastEnd(sf_vert_coords, vertex_unit, mydonor_vertices, leaf_donor_coords, MPI_REPLACE));
613       PetscCall(PetscSFBcastEnd(sf_face_cones, MPIU_INT, mydonor_cones, leaf_donor_cones, MPI_REPLACE));
614       PetscCall(PetscSFDestroy(&sf_vert_coords));
615       PetscCall(PetscSFDestroy(&sf_face_cones));
616       PetscCallMPI(MPI_Type_free(&vertex_unit));
617       PetscCall(PetscFree2(mydonor_vertices, mydonor_cones));
618     }
619 
620     { // Determine periodic orientation w/rt donor vertices and reorient
621       PetscReal tol = PetscSqr(PETSC_MACHINE_EPSILON * 1e3);
622       PetscInt *periodic2donor, dm_depth, maxConeSize;
623       PetscInt  coords_offset = 0, cones_offset = 0;
624 
625       PetscCall(DMPlexGetDepth(dm, &dm_depth));
626       PetscCall(DMPlexGetMaxSizes(dm, &maxConeSize, NULL));
627       PetscCall(DMGetWorkArray(dm, 2 * PetscPowInt(maxConeSize, dm_depth - 1), MPIU_INT, &periodic2donor));
628 
629       // Translate the periodic face vertices into the donor vertices
630       // Translation stored in periodic2donor
631       for (PetscInt i = 0; i < nleaves; i++) {
632         PetscInt  periodic_face = filocal[i], cl_size, num_verts = face_vertices_size[periodic_face - fStart];
633         PetscInt  cones_size = face_cones_size[periodic_face - fStart], p2d_count = 0;
634         PetscInt *closure = NULL;
635 
636         PetscCall(DMPlexGetTransitiveClosure(dm, periodic_face, PETSC_TRUE, &cl_size, &closure));
637         for (PetscInt p = 0; p < cl_size; p++) {
638           PetscInt     cl_point = closure[2 * p], coords_size, donor_vertex = -1;
639           PetscScalar *coords = NULL;
640 
641           if (!IsPointInsideStratum(cl_point, vStart, vEnd)) continue;
642           PetscCall(DMPlexVecGetClosure(dm, csection, coordinates, cl_point, &coords_size, &coords));
643           PetscAssert(coords_size == dim, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " has dof size %" PetscInt_FMT ", but should match dimension size %" PetscInt_FMT, cl_point, coords_size, dim);
644 
645           for (PetscInt v = 0; v < num_verts; v++) {
646             PetscReal dist_sqr = 0;
647             for (PetscInt d = 0; d < coords_size; d++) dist_sqr += PetscSqr(PetscRealPart(coords[d]) - leaf_donor_coords[(v + coords_offset) * dim + d]);
648             if (dist_sqr < tol) {
649               donor_vertex = leaf_donor_cones[cones_offset + v];
650               break;
651             }
652           }
653           PetscCheck(donor_vertex >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Periodic face %" PetscInt_FMT " could not find matching donor vertex for vertex %" PetscInt_FMT, periodic_face, cl_point);
654           if (PetscDefined(USE_DEBUG)) {
655             for (PetscInt c = 0; c < p2d_count; c++) PetscCheck(periodic2donor[2 * c + 1] != donor_vertex, comm, PETSC_ERR_PLIB, "Found repeated cone_point in periodic_ordering");
656           }
657 
658           periodic2donor[2 * p2d_count + 0] = cl_point;
659           periodic2donor[2 * p2d_count + 1] = donor_vertex;
660           p2d_count++;
661           PetscCall(DMPlexVecRestoreClosure(dm, csection, coordinates, cl_point, &coords_size, &coords));
662         }
663         coords_offset += num_verts;
664         PetscCall(DMPlexRestoreTransitiveClosure(dm, periodic_face, PETSC_TRUE, &cl_size, &closure));
665 
666         { // Determine periodic orientation w/rt donor vertices and reorient
667           PetscInt      depth, *p2d_cone, face_is_array[1] = {periodic_face};
668           IS           *is_arrays, face_is;
669           PetscSection *section_arrays;
670           PetscInt     *donor_cone_array = &leaf_donor_cones[cones_offset + num_verts];
671 
672           PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 1, face_is_array, PETSC_USE_POINTER, &face_is));
673           PetscCall(DMPlexGetConeRecursive(dm, face_is, &depth, &is_arrays, &section_arrays));
674           PetscCall(ISDestroy(&face_is));
675           PetscCall(DMGetWorkArray(dm, maxConeSize, MPIU_INT, &p2d_cone));
676           for (PetscInt d = 0; d < depth - 1; d++) {
677             PetscInt        pStart, pEnd;
678             PetscSection    section = section_arrays[d];
679             const PetscInt *periodic_cone_arrays, *periodic_point_arrays;
680 
681             PetscCall(ISGetIndices(is_arrays[d], &periodic_cone_arrays));
682             PetscCall(ISGetIndices(is_arrays[d + 1], &periodic_point_arrays)); // Points at d+1 correspond to the cones at d
683             PetscCall(PetscSectionGetChart(section_arrays[d], &pStart, &pEnd));
684             for (PetscInt p = pStart; p < pEnd; p++) {
685               PetscInt periodic_cone_size, periodic_cone_offset, periodic_point = periodic_point_arrays[p];
686 
687               PetscCall(PetscSectionGetDof(section, p, &periodic_cone_size));
688               PetscCall(PetscSectionGetOffset(section, p, &periodic_cone_offset));
689               const PetscInt *periodic_cone = &periodic_cone_arrays[periodic_cone_offset];
690               PetscCall(TranslateConeP2D(periodic_cone, periodic_cone_size, periodic2donor, p2d_count, p2d_cone));
691 
692               // Find the donor cone that matches the periodic point's cone
693               PetscInt  donor_cone_offset = 0, donor_point = -1, *donor_cone = NULL;
694               PetscBool cone_matches = PETSC_FALSE;
695               while (donor_cone_offset < cones_size - num_verts) {
696                 PetscInt donor_cone_size = donor_cone_array[donor_cone_offset];
697                 donor_point              = donor_cone_array[donor_cone_offset + 1];
698                 donor_cone               = &donor_cone_array[donor_cone_offset + 2];
699 
700                 if (donor_cone_size != periodic_cone_size) goto next_cone;
701                 for (PetscInt c = 0; c < periodic_cone_size; c++) {
702                   cone_matches = SearchIntArray(donor_cone[c], periodic_cone_size, p2d_cone);
703                   if (!cone_matches) goto next_cone;
704                 }
705                 // Save the found donor cone's point to the translation array. These will be used for higher depth points.
706                 // i.e. we save the edge translations for when we look for face cones
707                 periodic2donor[2 * p2d_count + 0] = periodic_point;
708                 periodic2donor[2 * p2d_count + 1] = donor_point;
709                 p2d_count++;
710                 break;
711 
712               next_cone:
713                 donor_cone_offset += donor_cone_size + 2;
714               }
715               PetscCheck(donor_cone_offset < cones_size - num_verts, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find donor cone equivalent to cone of periodic point %" PetscInt_FMT, periodic_point);
716 
717               { // Compare the donor cone with the translated periodic cone and reorient
718                 PetscInt       ornt;
719                 DMPolytopeType cell_type;
720                 PetscBool      found;
721                 PetscCall(DMPlexGetCellType(dm, periodic_point, &cell_type));
722                 PetscCall(DMPolytopeMatchOrientation(cell_type, donor_cone, p2d_cone, &ornt, &found));
723                 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Could not find transformation between donor (%" PetscInt_FMT ") and periodic (%" PetscInt_FMT ") cone's", periodic_point, donor_point);
724                 if (debug_printing) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Reorienting point %" PetscInt_FMT " by %" PetscInt_FMT "\n", periodic_point, ornt));
725                 PetscCall(DMPlexOrientPointWithCorrections(dm, periodic_point, ornt));
726               }
727             }
728             PetscCall(ISRestoreIndices(is_arrays[d], &periodic_cone_arrays));
729             PetscCall(ISRestoreIndices(is_arrays[d + 1], &periodic_point_arrays));
730           }
731           PetscCall(DMRestoreWorkArray(dm, maxConeSize, MPIU_INT, &p2d_cone));
732           PetscCall(DMPlexRestoreConeRecursive(dm, face_is, &depth, &is_arrays, &section_arrays));
733         }
734 
735         PetscCall(DMPlexRestoreTransitiveClosure(dm, periodic_face, PETSC_TRUE, &cl_size, &closure));
736         cones_offset += cones_size;
737       }
738       PetscCall(DMRestoreWorkArray(dm, 2 * PetscPowInt(maxConeSize, dm_depth - 1), MPIU_INT, &periodic2donor));
739     }
740 
741     PetscCall(PetscFree2(leaf_donor_coords, leaf_donor_cones));
742     PetscCall(PetscFree2(face_vertices_size, face_cones_size));
743   }
744   PetscFunctionReturn(PETSC_SUCCESS);
745 }
746 
747 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure.
748 //
749 // Output Arguments:
750 //
751 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
752 //   can be used to create a global section and section SF.
753 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
754 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
755 //
756 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
757 {
758   MPI_Comm           comm;
759   PetscMPIInt        rank;
760   PetscSF            point_sf;
761   PetscInt           nroots, nleaves;
762   const PetscInt    *filocal;
763   const PetscSFNode *firemote;
764 
765   PetscFunctionBegin;
766   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
767   PetscCallMPI(MPI_Comm_rank(comm, &rank));
768   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
769   PetscCall(PetscMalloc1(num_face_sfs, is_points));
770 
771   PetscCall(DMPlexCorrectOrientationForIsoperiodic(dm));
772 
773   for (PetscInt f = 0; f < num_face_sfs; f++) {
774     PetscSF   face_sf = face_sfs[f];
775     PetscInt *cl_sizes;
776     PetscInt  fStart, fEnd, rootbuffersize, leafbuffersize;
777     PetscSF   sf_closure;
778     PetscBT   rootbt;
779 
780     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
781     PetscCall(PetscMalloc1(fEnd - fStart, &cl_sizes));
782     for (PetscInt f = fStart, index = 0; f < fEnd; f++, index++) {
783       PetscInt cl_size, *closure = NULL;
784       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
785       cl_sizes[index] = cl_size - 1;
786       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
787     }
788 
789     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, cl_sizes, &rootbuffersize, &leafbuffersize, &rootbt, &sf_closure));
790     PetscCall(PetscFree(cl_sizes));
791     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
792 
793     PetscSFNode *leaf_donor_closure;
794     { // Pack root buffer with owner for every point in the root cones
795       PetscSFNode       *donor_closure;
796       const PetscInt    *pilocal;
797       const PetscSFNode *piremote;
798       PetscInt           npoints;
799 
800       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
801       PetscCall(PetscCalloc1(rootbuffersize, &donor_closure));
802       for (PetscInt p = 0, root_offset = 0; p < nroots; p++) {
803         if (!PetscBTLookup(rootbt, p)) continue;
804         PetscInt cl_size, *closure = NULL;
805         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
806         for (PetscInt j = 1; j < cl_size; j++) {
807           PetscInt c = closure[2 * j];
808           if (pilocal) {
809             PetscInt found = -1;
810             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
811             if (found >= 0) {
812               donor_closure[root_offset++] = piremote[found];
813               continue;
814             }
815           }
816           // we own c
817           donor_closure[root_offset].rank  = rank;
818           donor_closure[root_offset].index = c;
819           root_offset++;
820         }
821         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
822       }
823 
824       PetscCall(PetscMalloc1(leafbuffersize, &leaf_donor_closure));
825       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
826       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
827       PetscCall(PetscSFDestroy(&sf_closure));
828       PetscCall(PetscFree(donor_closure));
829     }
830 
831     PetscSFNode *new_iremote;
832     PetscCall(PetscCalloc1(nroots, &new_iremote));
833     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
834     // Walk leaves and match vertices
835     for (PetscInt i = 0, leaf_offset = 0; i < nleaves; i++) {
836       PetscInt  point   = filocal[i], cl_size;
837       PetscInt *closure = NULL;
838       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
839       for (PetscInt j = 1; j < cl_size; j++) {
840         PetscInt    c  = closure[2 * j];
841         PetscSFNode lc = leaf_donor_closure[leaf_offset];
842         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
843         if (new_iremote[c].rank == -1) {
844           new_iremote[c] = lc;
845         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
846         leaf_offset++;
847       }
848       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
849     }
850     PetscCall(PetscFree(leaf_donor_closure));
851 
852     // Include face points in closure SF
853     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
854     // consolidate leaves
855     PetscInt *leafdata;
856     PetscCall(PetscMalloc1(nroots, &leafdata));
857     PetscInt num_new_leaves = 0;
858     for (PetscInt i = 0; i < nroots; i++) {
859       if (new_iremote[i].rank == -1) continue;
860       new_iremote[num_new_leaves] = new_iremote[i];
861       leafdata[num_new_leaves]    = i;
862       num_new_leaves++;
863     }
864     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
865 
866     PetscSF csf;
867     PetscCall(PetscSFCreate(comm, &csf));
868     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
869     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
870     PetscCall(PetscFree(leafdata));
871     PetscCall(PetscBTDestroy(&rootbt));
872 
873     PetscInt npoints;
874     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
875     if (npoints < 0) { // empty point_sf
876       *closure_sf = csf;
877     } else {
878       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
879       PetscCall(PetscSFDestroy(&csf));
880     }
881     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
882     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
883   }
884   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
885   PetscFunctionReturn(PETSC_SUCCESS);
886 }
887 
888 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
889 {
890   DM_Plex *plex = (DM_Plex *)dm->data;
891 
892   PetscFunctionBegin;
893   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
894   if (sf) *sf = plex->periodic.composed_sf;
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
899 {
900   DM_Plex    *plex = (DM_Plex *)old_dm->data;
901   PetscSF     sf_point, *new_face_sfs;
902   PetscMPIInt rank;
903 
904   PetscFunctionBegin;
905   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
906   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
907   PetscCall(DMGetPointSF(dm, &sf_point));
908   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
909 
910   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
911     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
912     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
913     const PetscInt    *old_local, *point_local;
914     const PetscSFNode *old_remote, *point_remote;
915 
916     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
917     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
918     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
919     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
920     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
921 
922     // Fill new_leafdata with new owners of all points
923     for (PetscInt i = 0; i < new_npoints; i++) {
924       new_leafdata[i].rank  = rank;
925       new_leafdata[i].index = i;
926     }
927     for (PetscInt i = 0; i < point_nleaf; i++) {
928       PetscInt j      = point_local[i];
929       new_leafdata[j] = point_remote[i];
930     }
931     // REPLACE is okay because every leaf agrees about the new owners
932     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
933     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
934     // rootdata now contains the new owners
935 
936     // Send to leaves of old space
937     for (PetscInt i = 0; i < old_npoints; i++) {
938       leafdata[i].rank  = -1;
939       leafdata[i].index = -1;
940     }
941     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
942     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
943 
944     // Send to new leaf space
945     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
946     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
947 
948     PetscInt     nface = 0, *new_local;
949     PetscSFNode *new_remote;
950     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
951     PetscCall(PetscMalloc1(nface, &new_local));
952     PetscCall(PetscMalloc1(nface, &new_remote));
953     nface = 0;
954     for (PetscInt i = 0; i < new_npoints; i++) {
955       if (new_leafdata[i].rank == -1) continue;
956       new_local[nface]  = i;
957       new_remote[nface] = new_leafdata[i];
958       nface++;
959     }
960     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
961     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
962     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
963     {
964       char new_face_sf_name[PETSC_MAX_PATH_LEN];
965       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
966       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
967     }
968   }
969 
970   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
971   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
972   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
973   PetscCall(PetscFree(new_face_sfs));
974   PetscFunctionReturn(PETSC_SUCCESS);
975 }
976 
977 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
978 {
979   DM_Plex   *plex = (DM_Plex *)dm->data;
980   PetscCount count;
981   IS         isdof;
982   PetscInt   dim;
983 
984   PetscFunctionBegin;
985   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
986   PetscCheck(plex->periodic.periodic_points, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Isoperiodic PointSF must be created before this function is called");
987 
988   PetscCall(DMGetCoordinateDim(dm, &dim));
989   dm->periodic.num_affines = plex->periodic.num_face_sfs;
990   PetscCall(PetscFree2(dm->periodic.affine_to_local, dm->periodic.affine));
991   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
992 
993   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
994     PetscInt        npoints, vsize, isize;
995     const PetscInt *points;
996     IS              is = plex->periodic.periodic_points[f];
997     PetscSegBuffer  seg;
998     PetscSection    section;
999     PetscInt       *ind;
1000     Vec             L, P;
1001     VecType         vec_type;
1002     VecScatter      scatter;
1003     PetscScalar    *x;
1004 
1005     PetscCall(DMGetLocalSection(dm, &section));
1006     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
1007     PetscCall(ISGetSize(is, &npoints));
1008     PetscCall(ISGetIndices(is, &points));
1009     for (PetscInt i = 0; i < npoints; i++) {
1010       PetscInt point = points[i], off, dof;
1011 
1012       PetscCall(PetscSectionGetOffset(section, point, &off));
1013       PetscCall(PetscSectionGetDof(section, point, &dof));
1014       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
1015       for (PetscInt j = 0; j < dof / dim; j++) {
1016         PetscInt *slot;
1017 
1018         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
1019         *slot = off / dim + j;
1020       }
1021     }
1022     PetscCall(PetscSegBufferGetSize(seg, &count));
1023     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
1024     PetscCall(PetscSegBufferDestroy(&seg));
1025     PetscCall(PetscIntCast(count, &isize));
1026     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));
1027 
1028     PetscCall(PetscIntCast(count * dim, &vsize));
1029     PetscCall(DMGetLocalVector(dm, &L));
1030     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
1031     PetscCall(VecSetSizes(P, vsize, vsize));
1032     PetscCall(VecGetType(L, &vec_type));
1033     PetscCall(VecSetType(P, vec_type));
1034     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
1035     PetscCall(DMRestoreLocalVector(dm, &L));
1036     PetscCall(ISDestroy(&isdof));
1037 
1038     PetscCall(VecGetArrayWrite(P, &x));
1039     for (PetscCount i = 0; i < count; i++) {
1040       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
1041     }
1042     PetscCall(VecRestoreArrayWrite(P, &x));
1043 
1044     dm->periodic.affine_to_local[f] = scatter;
1045     dm->periodic.affine[f]          = P;
1046   }
1047   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
1048   PetscFunctionReturn(PETSC_SUCCESS);
1049 }
1050 
1051 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
1052 {
1053   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
1054   const Ijk closure_1[] = {
1055     {0, 0, 0},
1056     {1, 0, 0},
1057   };
1058   const Ijk closure_2[] = {
1059     {0, 0, 0},
1060     {1, 0, 0},
1061     {1, 1, 0},
1062     {0, 1, 0},
1063   };
1064   const Ijk closure_3[] = {
1065     {0, 0, 0},
1066     {0, 1, 0},
1067     {1, 1, 0},
1068     {1, 0, 0},
1069     {0, 0, 1},
1070     {1, 0, 1},
1071     {1, 1, 1},
1072     {0, 1, 1},
1073   };
1074   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
1075   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
1076   const PetscInt        face_marker_1[]   = {1, 2};
1077   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
1078   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
1079   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
1080   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
1081   // These orientations can be determined by examining cones of a reference quad and hex element.
1082   const PetscInt        face_orient_1[]   = {0, 0};
1083   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
1084   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
1085   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
1086 
1087   PetscFunctionBegin;
1088   PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
1089   PetscAssertPointer(dm, 1);
1090   PetscValidLogicalCollectiveInt(dm, dim, 2);
1091   PetscCall(DMSetDimension(dm, dim));
1092   PetscMPIInt rank, size;
1093   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
1094   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
1095   for (PetscInt i = 0; i < dim; i++) {
1096     eextent[i] = faces[i];
1097     vextent[i] = faces[i] + 1;
1098   }
1099   ZLayout layout;
1100   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
1101   PetscZSet vset; // set of all vertices in the closure of the owned elements
1102   PetscCall(PetscZSetCreate(&vset));
1103   PetscInt local_elems = 0;
1104   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
1105     Ijk loc = ZCodeSplit(z);
1106     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
1107     else {
1108       z += ZStepOct(z);
1109       continue;
1110     }
1111     if (IjkActive(layout.eextent, loc)) {
1112       local_elems++;
1113       // Add all neighboring vertices to set
1114       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
1115         Ijk   inc  = closure_dim[dim][n];
1116         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
1117         ZCode v    = ZEncode(nloc);
1118         PetscCall(PetscZSetAdd(vset, v));
1119       }
1120     }
1121   }
1122   PetscInt local_verts, off = 0;
1123   ZCode   *vert_z;
1124   PetscCall(PetscZSetGetSize(vset, &local_verts));
1125   PetscCall(PetscMalloc1(local_verts, &vert_z));
1126   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
1127   PetscCall(PetscZSetDestroy(&vset));
1128   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
1129   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
1130 
1131   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
1132   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
1133   PetscCall(DMSetUp(dm));
1134   {
1135     PetscInt e = 0;
1136     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
1137       Ijk loc = ZCodeSplit(z);
1138       if (!IjkActive(layout.eextent, loc)) {
1139         z += ZStepOct(z);
1140         continue;
1141       }
1142       PetscInt cone[8], orient[8] = {0};
1143       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
1144         Ijk      inc  = closure_dim[dim][n];
1145         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
1146         ZCode    v    = ZEncode(nloc);
1147         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
1148         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
1149         cone[n] = local_elems + ci;
1150       }
1151       PetscCall(DMPlexSetCone(dm, e, cone));
1152       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
1153       e++;
1154     }
1155   }
1156 
1157   PetscCall(DMPlexSymmetrize(dm));
1158   PetscCall(DMPlexStratify(dm));
1159 
1160   { // Create point SF
1161     PetscSF sf;
1162     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
1163     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
1164     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
1165     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
1166     PetscInt    *local_ghosts;
1167     PetscSFNode *ghosts;
1168     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
1169     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
1170     for (PetscInt i = 0; i < num_ghosts;) {
1171       ZCode       z = vert_z[owned_verts + i];
1172       PetscMPIInt remote_rank, remote_count = 0;
1173 
1174       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout.zstarts), &remote_rank));
1175       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
1176       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
1177 
1178       // Count the elements on remote_rank
1179       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
1180 
1181       // Traverse vertices and make ghost links
1182       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
1183         Ijk loc = ZCodeSplit(rz);
1184         if (rz == z) {
1185           local_ghosts[i] = local_elems + owned_verts + i;
1186           ghosts[i].rank  = remote_rank;
1187           ghosts[i].index = remote_elem + remote_count;
1188           i++;
1189           if (i == num_ghosts) break;
1190           z = vert_z[owned_verts + i];
1191         }
1192         if (IjkActive(layout.vextent, loc)) remote_count++;
1193         else rz += ZStepOct(rz);
1194       }
1195     }
1196     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
1197     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
1198     PetscCall(DMSetPointSF(dm, sf));
1199     PetscCall(PetscSFDestroy(&sf));
1200   }
1201   {
1202     Vec          coordinates;
1203     PetscScalar *coords;
1204     PetscSection coord_section;
1205     PetscInt     coord_size;
1206     PetscCall(DMGetCoordinateSection(dm, &coord_section));
1207     PetscCall(PetscSectionSetNumFields(coord_section, 1));
1208     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
1209     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
1210     for (PetscInt v = 0; v < local_verts; v++) {
1211       PetscInt point = local_elems + v;
1212       PetscCall(PetscSectionSetDof(coord_section, point, dim));
1213       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
1214     }
1215     PetscCall(PetscSectionSetUp(coord_section));
1216     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
1217     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
1218     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
1219     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
1220     PetscCall(VecSetBlockSize(coordinates, dim));
1221     PetscCall(VecSetType(coordinates, VECSTANDARD));
1222     PetscCall(VecGetArray(coordinates, &coords));
1223     for (PetscInt v = 0; v < local_verts; v++) {
1224       Ijk loc             = ZCodeSplit(vert_z[v]);
1225       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
1226       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
1227       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
1228     }
1229     PetscCall(VecRestoreArray(coordinates, &coords));
1230     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
1231     PetscCall(VecDestroy(&coordinates));
1232   }
1233   if (interpolate) {
1234     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
1235 
1236     DMLabel label;
1237     PetscCall(DMCreateLabel(dm, "Face Sets"));
1238     PetscCall(DMGetLabel(dm, "Face Sets", &label));
1239     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
1240     for (PetscInt i = 0; i < 3; i++) {
1241       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
1242       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
1243       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
1244     }
1245     PetscInt fStart, fEnd, vStart, vEnd;
1246     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
1247     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
1248     for (PetscInt f = fStart; f < fEnd; f++) {
1249       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
1250       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
1251       PetscInt bc_count[6] = {0};
1252       for (PetscInt i = 0; i < npoints; i++) {
1253         PetscInt p = points[2 * i];
1254         if (!IsPointInsideStratum(p, vStart, vEnd)) continue;
1255         fverts[num_fverts++] = p;
1256         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
1257         // Convention here matches DMPlexCreateCubeMesh_Internal
1258         bc_count[0] += loc.i == 0;
1259         bc_count[1] += loc.i == layout.vextent.i - 1;
1260         bc_count[2] += loc.j == 0;
1261         bc_count[3] += loc.j == layout.vextent.j - 1;
1262         bc_count[4] += loc.k == 0;
1263         bc_count[5] += loc.k == layout.vextent.k - 1;
1264       }
1265       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
1266       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
1267         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
1268           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
1269           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
1270             PetscInt *put;
1271             if (bc % 2 == 0) { // donor face; no label
1272               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
1273               *put = f;
1274             } else { // periodic face
1275               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
1276               *put = f;
1277               ZCode *zput;
1278               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
1279               for (PetscInt i = 0; i < num_fverts; i++) {
1280                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
1281                 switch (bc / 2) {
1282                 case 0:
1283                   loc.i = 0;
1284                   break;
1285                 case 1:
1286                   loc.j = 0;
1287                   break;
1288                 case 2:
1289                   loc.k = 0;
1290                   break;
1291                 }
1292                 *zput++ = ZEncode(loc);
1293               }
1294             }
1295             continue;
1296           }
1297           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
1298           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
1299           bc_match++;
1300         }
1301       }
1302     }
1303     // Ensure that the Coordinate DM has our new boundary labels
1304     DM cdm;
1305     PetscCall(DMGetCoordinateDM(dm, &cdm));
1306     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
1307     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
1308       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
1309     }
1310     for (PetscInt i = 0; i < 3; i++) {
1311       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
1312       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
1313       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
1314     }
1315   }
1316   PetscCall(PetscFree(layout.zstarts));
1317   PetscCall(PetscFree(vert_z));
1318   PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
1319   PetscFunctionReturn(PETSC_SUCCESS);
1320 }
1321 
1322 /*@
1323   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
1324 
1325   Logically Collective
1326 
1327   Input Parameters:
1328 + dm           - The `DMPLEX` on which to set periodicity
1329 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
1330 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1331 
1332   Level: advanced
1333 
1334   Note:
1335   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
1336   and locally, but are paired when creating a global dof space.
1337 
1338 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
1339 @*/
1340 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
1341 {
1342   DM_Plex *plex = (DM_Plex *)dm->data;
1343 
1344   PetscFunctionBegin;
1345   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1346   if (num_face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
1347   else PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", NULL));
1348   if (num_face_sfs == plex->periodic.num_face_sfs && (num_face_sfs == 0 || face_sfs == plex->periodic.face_sfs)) PetscFunctionReturn(PETSC_SUCCESS);
1349   PetscCall(DMSetGlobalSection(dm, NULL));
1350 
1351   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
1352 
1353   if (plex->periodic.num_face_sfs > 0) {
1354     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
1355     PetscCall(PetscFree(plex->periodic.face_sfs));
1356   }
1357 
1358   plex->periodic.num_face_sfs = num_face_sfs;
1359   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
1360   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
1361 
1362   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
1363   if (cdm) {
1364     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
1365     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 /*@C
1371   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
1372 
1373   Logically Collective
1374 
1375   Input Parameter:
1376 . dm - The `DMPLEX` for which to obtain periodic relation
1377 
1378   Output Parameters:
1379 + num_face_sfs - Number of `PetscSF`s in the array
1380 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1381 
1382   Level: advanced
1383 
1384 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1385 @*/
1386 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1387 {
1388   DM_Plex *plex = (DM_Plex *)dm->data;
1389 
1390   PetscFunctionBegin;
1391   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1392   *face_sfs     = plex->periodic.face_sfs;
1393   *num_face_sfs = plex->periodic.num_face_sfs;
1394   PetscFunctionReturn(PETSC_SUCCESS);
1395 }
1396 
1397 /*@C
1398   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1399 
1400   Logically Collective
1401 
1402   Input Parameters:
1403 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1404 . n  - Number of transforms in array
1405 - t  - Array of 4x4 affine transformation basis.
1406 
1407   Level: advanced
1408 
1409   Notes:
1410   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1411   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1412   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1413   simple matrix multiplication.
1414 
1415   Although the interface accepts a general affine transform, only affine translation is supported at present.
1416 
1417   Developer Notes:
1418   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1419   adding GPU implementations to apply the G2L/L2G transforms.
1420 
1421 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1422 @*/
1423 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1424 {
1425   DM_Plex *plex = (DM_Plex *)dm->data;
1426 
1427   PetscFunctionBegin;
1428   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1429   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1430 
1431   PetscCall(PetscFree(plex->periodic.transform));
1432   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1433   for (PetscInt i = 0; i < n; i++) {
1434     for (PetscInt j = 0; j < 4; j++) {
1435       for (PetscInt k = 0; k < 4; k++) {
1436         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1437         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1438       }
1439     }
1440   }
1441   PetscFunctionReturn(PETSC_SUCCESS);
1442 }
1443