xref: /petsc/src/dm/impls/plex/plexsfc.c (revision 42e5ec60e3f6d3f21d92afbe12a337913bc9ad72)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 // ***** Overview of ZCode *******
21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index.
22 // This is known as Morton encoding, and is refered to as ZCode in this code.
23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be:
24 //    [k2,j2,i2,k1,j1,i1,k0,j0,i0]
25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`).
26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices.
27 
28 // Decodes the leading interleaved index from a ZCode
29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0]
30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 (translated to octal)
31 static unsigned ZCodeSplit1(ZCode z)
32 {
33   z &= 0111111111111111111111;
34   z = (z | z >> 2) & 0103030303030303030303;
35   z = (z | z >> 4) & 0100170017001700170017;
36   z = (z | z >> 8) & 0370000037700000377;
37   z = (z | z >> 16) & 0370000000000177777;
38   z = (z | z >> 32) & 07777777;
39   return (unsigned)z;
40 }
41 
42 // Encodes the leading interleaved index from a ZCode
43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0]
44 static ZCode ZEncode1(unsigned t)
45 {
46   ZCode z = t;
47   z &= 07777777;
48   z = (z | z << 32) & 0370000000000177777;
49   z = (z | z << 16) & 0370000037700000377;
50   z = (z | z << 8) & 0100170017001700170017;
51   z = (z | z << 4) & 0103030303030303030303;
52   z = (z | z << 2) & 0111111111111111111111;
53   return z;
54 }
55 
56 // Decodes i j k indices from a ZCode.
57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode
58 static Ijk ZCodeSplit(ZCode z)
59 {
60   Ijk c;
61   c.i = ZCodeSplit1(z >> 2);
62   c.j = ZCodeSplit1(z >> 1);
63   c.k = ZCodeSplit1(z >> 0);
64   return c;
65 }
66 
67 // Encodes i j k indices to a ZCode.
68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement
69 static ZCode ZEncode(Ijk c)
70 {
71   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
72   return z;
73 }
74 
75 static PetscBool IjkActive(Ijk extent, Ijk l)
76 {
77   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
78   return PETSC_FALSE;
79 }
80 
81 // If z is not the base of an octet (last 3 bits 0), return 0.
82 //
83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
85 static ZCode ZStepOct(ZCode z)
86 {
87   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
88   ZCode step = 07;
89   for (; (z & step) == 0; step = (step << 3) | 07) { }
90   return step >> 3;
91 }
92 
93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
95 {
96   PetscFunctionBegin;
97   layout->eextent.i = eextent[0];
98   layout->eextent.j = eextent[1];
99   layout->eextent.k = eextent[2];
100   layout->vextent.i = vextent[0];
101   layout->vextent.j = vextent[1];
102   layout->vextent.k = vextent[2];
103   layout->comm_size = size;
104   layout->zstarts   = NULL;
105   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
106 
107   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
108   ZCode    z           = 0;
109   layout->zstarts[0]   = 0;
110   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
111   for (PetscMPIInt r = 0; r < size; r++) {
112     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
113     for (count = 0; count < elems_needed; z++) {
114       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
115       for (ZCode s = skip;; s >>= 3) {
116         Ijk trial = ZCodeSplit(z + s);
117         if (IjkActive(layout->eextent, trial)) {
118           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
119           count += s + 1;
120           z += s;
121           break;
122         }
123         if (s == 0) { // the whole skip octet is inactive
124           z += skip;
125           break;
126         }
127       }
128     }
129     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
130     //
131     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
132     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
133     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
134     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
135     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
136     // complicate the job of identifying an owner and its offset.
137     //
138     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
139     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
140     // the issue:
141     //
142     // Consider this partition on rank 0 (left) and rank 1.
143     //
144     //    4 --------  5 -- 14 --10 -- 21 --11
145     //                |          |          |
146     // 7 -- 16 --  8  |          |          |
147     // |           |  3 -------  7 -------  9
148     // |           |             |          |
149     // 4 --------  6 ------ 10   |          |
150     // |           |         |   6 -- 16 -- 8
151     // |           |         |
152     // 3 ---11---  5 --18--  9
153     //
154     // The periodic face SF looks like
155     // [0] Number of roots=21, leaves=1, remote ranks=1
156     // [0] 16 <- (0,11)
157     // [1] Number of roots=22, leaves=2, remote ranks=2
158     // [1] 14 <- (0,18)
159     // [1] 21 <- (1,16)
160     //
161     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
162     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
163     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
164     //
165     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
166     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
167     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
168     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
169     // stranded vertices.
170     for (; z <= ZEncode(layout->vextent); z++) {
171       Ijk loc = ZCodeSplit(z);
172       if (IjkActive(layout->eextent, loc)) break;
173       z += ZStepOct(z);
174     }
175     layout->zstarts[r + 1] = z;
176   }
177   layout->zstarts[size] = ZEncode(layout->vextent);
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
182 {
183   PetscInt remote_elem = 0;
184   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
185     Ijk loc = ZCodeSplit(rz);
186     if (IjkActive(layout->eextent, loc)) remote_elem++;
187     else rz += ZStepOct(rz);
188   }
189   return remote_elem;
190 }
191 
192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
193 {
194   PetscInt lo = 0, hi = n;
195 
196   if (n == 0) return -1;
197   while (hi - lo > 1) {
198     PetscInt mid = lo + (hi - lo) / 2;
199     if (key < X[mid]) hi = mid;
200     else lo = mid;
201   }
202   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
203 }
204 
205 static inline PetscBool IsPointInsideStratum(PetscInt point, PetscInt pStart, PetscInt pEnd)
206 {
207   return (point >= pStart && point < pEnd) ? PETSC_TRUE : PETSC_FALSE;
208 }
209 
210 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
211 {
212   MPI_Comm    comm;
213   PetscInt    dim, vStart, vEnd;
214   PetscMPIInt size;
215   PetscSF     face_sfs[3];
216   PetscScalar transforms[3][4][4] = {{{0}}};
217 
218   PetscFunctionBegin;
219   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
220   PetscCallMPI(MPI_Comm_size(comm, &size));
221   PetscCall(DMGetDimension(dm, &dim));
222   const PetscInt csize = PetscPowInt(2, dim - 1);
223   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
224 
225   PetscInt num_directions = 0;
226   for (PetscInt direction = 0; direction < dim; direction++) {
227     PetscCount   num_faces;
228     PetscInt    *faces;
229     ZCode       *donor_verts, *donor_minz;
230     PetscSFNode *leaf;
231     PetscCount   num_multiroots = 0;
232     PetscInt     pStart, pEnd;
233     PetscBool    sorted;
234     PetscInt     inum_faces;
235 
236     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
237     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
238     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
239     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
240     PetscCall(PetscMalloc1(num_faces, &donor_minz));
241     PetscCall(PetscMalloc1(num_faces, &leaf));
242     for (PetscCount i = 0; i < num_faces; i++) {
243       ZCode minz = donor_verts[i * csize];
244 
245       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
246       donor_minz[i] = minz;
247     }
248     PetscCall(PetscIntCast(num_faces, &inum_faces));
249     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
250     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
251     // Checking for sorting is a cheap check that there are no duplicates.
252     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
253     for (PetscCount i = 0; i < num_faces;) {
254       ZCode       z = donor_minz[i];
255       PetscMPIInt remote_rank, remote_count = 0;
256 
257       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout->zstarts), &remote_rank));
258       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
259       // Process all the vertices on this rank
260       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
261         Ijk loc = ZCodeSplit(rz);
262 
263         if (rz == z) {
264           leaf[i].rank  = remote_rank;
265           leaf[i].index = remote_count;
266           i++;
267           if (i == num_faces) break;
268           z = donor_minz[i];
269         }
270         if (IjkActive(layout->vextent, loc)) remote_count++;
271       }
272     }
273     PetscCall(PetscFree(donor_minz));
274     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
275     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
276     const PetscInt *my_donor_degree;
277     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
278     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
279 
280     for (PetscInt i = 0; i < vEnd - vStart; i++) {
281       num_multiroots += my_donor_degree[i];
282       if (my_donor_degree[i] == 0) continue;
283       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
284     }
285     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
286     PetscCount num_my_donors;
287 
288     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
289     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots);
290     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
291     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
292     for (PetscCount i = 0; i < num_my_donors; i++) {
293       PetscInt f = my_donors[i];
294       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;
295 
296       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
297       for (PetscInt j = 0; j < num_points; j++) {
298         PetscInt p = points[2 * j];
299         if (!IsPointInsideStratum(p, vStart, vEnd)) continue;
300         minv = PetscMin(minv, p);
301       }
302       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
303       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
304       my_donor_indices[minv - vStart] = f;
305     }
306     PetscCall(PetscMalloc1(num_faces, &donor_indices));
307     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
308     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
309     PetscCall(PetscFree(my_donor_indices));
310     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
311     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
312     PetscCall(PetscFree(donor_indices));
313     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
314     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
315     {
316       char face_sf_name[PETSC_MAX_PATH_LEN];
317       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
318       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
319     }
320 
321     transforms[num_directions][0][0]         = 1;
322     transforms[num_directions][1][1]         = 1;
323     transforms[num_directions][2][2]         = 1;
324     transforms[num_directions][3][3]         = 1;
325     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
326     num_directions++;
327   }
328 
329   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
330   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
331 
332   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
337 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
338 // We use this crude approach here so we don't have to write new GPU kernels yet.
339 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
340 {
341   PetscFunctionBegin;
342   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
343   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
344     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
345     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
346   }
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 // Modify index array based on the transformation of `point` for the given section and field
351 // Used for correcting the sfNatural based on point reorientation
352 static PetscErrorCode DMPlexOrientFieldPointIndex(DM dm, PetscSection section, PetscInt field, PetscInt array_size, PetscInt array[], PetscInt point, PetscInt orientation)
353 {
354   PetscInt        *copy;
355   PetscInt         dof, off, point_ornt[2] = {point, orientation};
356   const PetscInt **perms;
357 
358   PetscFunctionBeginUser;
359   PetscCall(PetscSectionGetDof(section, point, &dof));
360   PetscCall(PetscSectionGetOffset(section, point, &off));
361   PetscCheck(off + dof <= array_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Section indices exceed index array bounds");
362   PetscCall(DMGetWorkArray(dm, dof, MPIU_INT, &copy));
363   PetscArraycpy(copy, &array[off], dof);
364 
365   PetscCall(PetscSectionGetFieldPointSyms(section, field, 1, point_ornt, &perms, NULL));
366   for (PetscInt i = 0; i < dof; i++) {
367     if (perms[0]) array[off + perms[0][i]] = copy[i];
368   }
369 
370   PetscCall(PetscSectionRestoreFieldPointSyms(section, field, 1, point_ornt, &perms, NULL));
371   PetscCall(DMRestoreWorkArray(dm, dof, MPIU_INT, &copy));
372   PetscFunctionReturn(PETSC_SUCCESS);
373 }
374 
375 // Modify Vec based on the transformation of `point` for the given section and field
376 static PetscErrorCode DMPlexOrientFieldPointVec(DM dm, PetscSection section, PetscInt field, Vec V, PetscInt point, PetscInt orientation)
377 {
378   PetscScalar        *copy, *V_arr;
379   PetscInt            dof, off, point_ornt[2] = {point, orientation};
380   const PetscInt    **perms;
381   const PetscScalar **rots;
382 
383   PetscFunctionBeginUser;
384   PetscCall(PetscSectionGetDof(section, point, &dof));
385   PetscCall(PetscSectionGetOffset(section, point, &off));
386   PetscCall(VecGetArray(V, &V_arr));
387   PetscCall(DMGetWorkArray(dm, dof, MPIU_SCALAR, &copy));
388   PetscArraycpy(copy, &V_arr[off], dof);
389 
390   PetscCall(PetscSectionGetFieldPointSyms(section, field, 1, point_ornt, &perms, &rots));
391   for (PetscInt i = 0; i < dof; i++) {
392     if (perms[0]) V_arr[off + perms[0][i]] = copy[i];
393     if (rots[0]) V_arr[off + perms[0][i]] *= rots[0][i];
394   }
395 
396   PetscCall(PetscSectionRestoreFieldPointSyms(section, field, 1, point_ornt, &perms, &rots));
397   PetscCall(DMRestoreWorkArray(dm, dof, MPIU_SCALAR, &copy));
398   PetscCall(VecRestoreArray(V, &V_arr));
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 // Reorient the point in the DMPlex while also applying necessary corrections to other structures (e.g. coordinates)
403 static PetscErrorCode DMPlexOrientPointWithCorrections(DM dm, PetscInt point, PetscInt ornt, PetscSection perm_section, PetscInt perm_length, PetscInt perm[])
404 {
405   // TODO: Potential speed up if we early exit for ornt == 0 (i.e. if ornt is identity, we don't need to do anything)
406   PetscFunctionBeginUser;
407   PetscCall(DMPlexOrientPoint(dm, point, ornt));
408 
409   { // Correct coordinates based on new cone ordering
410     DM           cdm;
411     PetscSection csection;
412     Vec          coordinates;
413     PetscInt     pStart, pEnd;
414 
415     PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
416     PetscCall(DMGetCoordinateDM(dm, &cdm));
417     PetscCall(DMGetLocalSection(cdm, &csection));
418     PetscCall(PetscSectionGetChart(csection, &pStart, &pEnd));
419     if (IsPointInsideStratum(point, pStart, pEnd)) PetscCall(DMPlexOrientFieldPointVec(cdm, csection, 0, coordinates, point, ornt));
420   }
421 
422   if (perm_section) {
423     PetscInt num_fields;
424     PetscCall(PetscSectionGetNumFields(perm_section, &num_fields));
425     for (PetscInt f = 0; f < num_fields; f++) PetscCall(DMPlexOrientFieldPointIndex(dm, perm_section, f, perm_length, perm, point, ornt));
426   }
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 // Creates SF to communicate data from donor to periodic faces. The data can be different sizes per donor-periodic pair and is given in `point_sizes[]`
431 static PetscErrorCode CreateDonorToPeriodicSF(DM dm, PetscSF face_sf, PetscInt pStart, PetscInt pEnd, const PetscInt point_sizes[], PetscInt *rootbuffersize, PetscInt *leafbuffersize, PetscBT *rootbt, PetscSF *sf_closure)
432 {
433   MPI_Comm           comm;
434   PetscMPIInt        rank;
435   PetscInt           nroots, nleaves;
436   PetscInt          *rootdata, *leafdata;
437   const PetscInt    *filocal;
438   const PetscSFNode *firemote;
439 
440   PetscFunctionBeginUser;
441   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
442   PetscCallMPI(MPI_Comm_rank(comm, &rank));
443 
444   PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
445   PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
446   for (PetscInt i = 0; i < nleaves; i++) {
447     PetscInt point = filocal[i];
448     PetscCheck(IsPointInsideStratum(point, pStart, pEnd), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in leaves exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", point, pStart, pEnd);
449     leafdata[point] = point_sizes[point - pStart];
450   }
451   PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
452   PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
453 
454   PetscInt root_offset = 0;
455   PetscCall(PetscBTCreate(nroots, rootbt));
456   for (PetscInt p = 0; p < nroots; p++) {
457     const PetscInt *donor_dof = rootdata + nroots;
458     if (donor_dof[p] == 0) {
459       rootdata[2 * p]     = -1;
460       rootdata[2 * p + 1] = -1;
461       continue;
462     }
463     PetscCall(PetscBTSet(*rootbt, p));
464     PetscCheck(IsPointInsideStratum(p, pStart, pEnd), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in roots exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", p, pStart, pEnd);
465     PetscInt p_size = point_sizes[p - pStart];
466     PetscCheck(donor_dof[p] == p_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf data size (%" PetscInt_FMT ") does not match root data size (%" PetscInt_FMT ")", donor_dof[p], p_size);
467     rootdata[2 * p]     = root_offset;
468     rootdata[2 * p + 1] = p_size;
469     root_offset += p_size;
470   }
471   PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
472   PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
473   // Count how many leaves we need to communicate the closures
474   PetscInt leaf_offset = 0;
475   for (PetscInt i = 0; i < nleaves; i++) {
476     PetscInt point = filocal[i];
477     if (leafdata[2 * point + 1] < 0) continue;
478     leaf_offset += leafdata[2 * point + 1];
479   }
480 
481   PetscSFNode *closure_leaf;
482   PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
483   leaf_offset = 0;
484   for (PetscInt i = 0; i < nleaves; i++) {
485     PetscInt point   = filocal[i];
486     PetscInt cl_size = leafdata[2 * point + 1];
487     if (cl_size < 0) continue;
488     for (PetscInt j = 0; j < cl_size; j++) {
489       closure_leaf[leaf_offset].rank  = firemote[i].rank;
490       closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
491       leaf_offset++;
492     }
493   }
494 
495   PetscCall(PetscSFCreate(comm, sf_closure));
496   PetscCall(PetscSFSetGraph(*sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
497   *rootbuffersize = root_offset;
498   *leafbuffersize = leaf_offset;
499   PetscCall(PetscFree2(rootdata, leafdata));
500   PetscFunctionReturn(PETSC_SUCCESS);
501 }
502 
503 // Determine if `key` is in `array`. `array` does NOT need to be sorted
504 static inline PetscBool SearchIntArray(PetscInt key, PetscInt array_size, const PetscInt array[])
505 {
506   for (PetscInt i = 0; i < array_size; i++)
507     if (array[i] == key) return PETSC_TRUE;
508   return PETSC_FALSE;
509 }
510 
511 // Translate a cone in periodic points to the cone in donor points based on the `periodic2donor` array
512 static inline PetscErrorCode TranslateConeP2D(const PetscInt periodic_cone[], PetscInt cone_size, const PetscInt periodic2donor[], PetscInt p2d_count, PetscInt p2d_cone[])
513 {
514   PetscFunctionBeginUser;
515   for (PetscInt p = 0; p < cone_size; p++) {
516     PetscInt p2d_index = -1;
517     for (PetscInt p2d = 0; p2d < p2d_count; p2d++) {
518       if (periodic2donor[p2d * 2] == periodic_cone[p]) p2d_index = p2d;
519     }
520     PetscCheck(p2d_index >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find periodic point in periodic-to-donor array");
521     p2d_cone[p] = periodic2donor[2 * p2d_index + 1];
522   }
523   PetscFunctionReturn(PETSC_SUCCESS);
524 }
525 
526 // Corrects the cone order of periodic faces (and their transitive closure's cones) to match their donor face pair.
527 //
528 // This is done by:
529 // 1. Communicating the donor's vertex coordinates and recursive cones (i.e. its own cone and those of it's constituent edges) to it's periodic pairs
530 //    - The donor vertices have the isoperiodic transform applied to them such that they should match exactly
531 // 2. Translating the periodic vertices into the donor vertices point IDs
532 // 3. Translating the cone of each periodic point into the donor point IDs
533 // 4. Comparing the periodic-to-donor cone to the donor cone for each point
534 // 5. Apply the necessary transformation to the periodic cone to make it match the donor cone
535 static PetscErrorCode DMPlexCorrectOrientationForIsoperiodic(DM dm)
536 {
537   MPI_Comm        comm;
538   DM_Plex        *plex = (DM_Plex *)dm->data;
539   PetscInt        nroots, nleaves;
540   PetscInt       *local_vec_perm = NULL, local_vec_length = 0, *global_vec_perm = NULL, global_vec_length = 0;
541   const PetscInt *filocal, coords_field_id = 0;
542   DM              cdm;
543   PetscSection    csection, localSection = NULL;
544   PetscSF         sfNatural_old = NULL;
545   Vec             coordinates;
546   PetscMPIInt     myrank;
547   PetscBool       debug_printing = PETSC_FALSE;
548 
549   PetscFunctionBeginUser;
550   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
551   PetscCallMPI(MPI_Comm_rank(comm, &myrank));
552   PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
553   PetscCheck(coordinates, comm, PETSC_ERR_ARG_WRONGSTATE, "DM must have coordinates to setup isoperiodic");
554   PetscCall(DMGetCoordinateDM(dm, &cdm));
555   PetscCall(DMGetLocalSection(cdm, &csection));
556 
557   PetscCall(DMGetNaturalSF(dm, &sfNatural_old));
558   // Prep data for naturalSF correction
559   if (plex->periodic.num_face_sfs > 0 && sfNatural_old) {
560     PetscSection       globalSection;
561     PetscSF            pointSF, sectionSF;
562     PetscInt           nleaves;
563     const PetscInt    *ilocal;
564     const PetscSFNode *iremote;
565 
566     // Create global section with just pointSF and including constraints
567     PetscCall(DMGetLocalSection(dm, &localSection));
568     PetscCall(DMGetPointSF(dm, &pointSF));
569     PetscCall(PetscSectionCreateGlobalSection(localSection, pointSF, PETSC_TRUE, PETSC_TRUE, PETSC_FALSE, &globalSection));
570 
571     // Set local_vec_perm to be negative values when that dof is not owned by the current rank
572     // Dofs that are owned are set to their corresponding global Vec index
573     PetscCall(PetscSectionGetStorageSize(globalSection, &global_vec_length));
574     PetscCall(PetscSectionGetStorageSize(localSection, &local_vec_length));
575     PetscCall(PetscMalloc2(global_vec_length, &global_vec_perm, local_vec_length, &local_vec_perm));
576     for (PetscInt i = 0; i < global_vec_length; i++) global_vec_perm[i] = i;
577     for (PetscInt i = 0; i < local_vec_length; i++) local_vec_perm[i] = -(i + 1);
578 
579     PetscCall(PetscSFCreate(comm, &sectionSF));
580     PetscCall(PetscSFSetGraphSection(sectionSF, localSection, globalSection));
581     PetscCall(PetscSFGetGraph(sectionSF, NULL, &nleaves, &ilocal, &iremote));
582     for (PetscInt l = 0; l < nleaves; l++) {
583       if (iremote[l].rank != myrank) continue;
584       PetscInt local_index        = ilocal ? ilocal[l] : l;
585       local_vec_perm[local_index] = global_vec_perm[iremote[l].index];
586     }
587     PetscCall(PetscSectionDestroy(&globalSection));
588     PetscCall(PetscSFDestroy(&sectionSF));
589   }
590 
591   // Create sf_vert_coords and sf_face_cones for communicating donor vertices and cones to periodic faces, respectively
592   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
593     PetscSF face_sf                  = plex->periodic.face_sfs[f];
594     const PetscScalar(*transform)[4] = (const PetscScalar(*)[4])plex->periodic.transform[f];
595     PetscInt *face_vertices_size, *face_cones_size;
596     PetscInt  fStart, fEnd, vStart, vEnd, rootnumvert, leafnumvert, rootconesize, leafconesize, dim;
597     PetscSF   sf_vert_coords, sf_face_cones;
598     PetscBT   rootbt;
599 
600     PetscCall(DMGetCoordinateDim(dm, &dim));
601     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
602     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
603     PetscCall(PetscCalloc2(fEnd - fStart, &face_vertices_size, fEnd - fStart, &face_cones_size));
604 
605     // Create SFs to communicate donor vertices and donor cones to periodic faces
606     for (PetscInt f = fStart, index = 0; f < fEnd; f++, index++) {
607       PetscInt cl_size, *closure = NULL, num_vertices = 0;
608       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
609       for (PetscInt p = 0; p < cl_size; p++) {
610         PetscInt cl_point = closure[2 * p];
611         if (IsPointInsideStratum(cl_point, vStart, vEnd)) num_vertices++;
612         else {
613           PetscInt cone_size;
614           PetscCall(DMPlexGetConeSize(dm, cl_point, &cone_size));
615           face_cones_size[index] += cone_size + 2;
616         }
617       }
618       face_vertices_size[index] = num_vertices;
619       face_cones_size[index] += num_vertices;
620       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
621     }
622     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, face_vertices_size, &rootnumvert, &leafnumvert, &rootbt, &sf_vert_coords));
623     PetscCall(PetscBTDestroy(&rootbt));
624     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, face_cones_size, &rootconesize, &leafconesize, &rootbt, &sf_face_cones));
625 
626     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, NULL));
627 
628     PetscReal *leaf_donor_coords;
629     PetscInt  *leaf_donor_cones;
630 
631     { // Communicate donor coords and cones to the periodic faces
632       PetscReal         *mydonor_vertices;
633       PetscInt          *mydonor_cones;
634       const PetscScalar *coords_arr;
635 
636       PetscCall(PetscCalloc2(rootnumvert * dim, &mydonor_vertices, rootconesize, &mydonor_cones));
637       PetscCall(VecGetArrayRead(coordinates, &coords_arr));
638       for (PetscInt donor_face = 0, donor_vert_offset = 0, donor_cone_offset = 0; donor_face < nroots; donor_face++) {
639         if (!PetscBTLookup(rootbt, donor_face)) continue;
640         PetscInt cl_size, *closure = NULL;
641 
642         PetscCall(DMPlexGetTransitiveClosure(dm, donor_face, PETSC_TRUE, &cl_size, &closure));
643         // Pack vertex coordinates
644         for (PetscInt p = 0; p < cl_size; p++) {
645           PetscInt cl_point = closure[2 * p], dof, offset;
646           if (!IsPointInsideStratum(cl_point, vStart, vEnd)) continue;
647           mydonor_cones[donor_cone_offset++] = cl_point;
648           PetscCall(PetscSectionGetFieldDof(csection, cl_point, coords_field_id, &dof));
649           PetscCall(PetscSectionGetFieldOffset(csection, cl_point, coords_field_id, &offset));
650           PetscAssert(dof == dim, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " has dof size %" PetscInt_FMT ", but should match dimension size %" PetscInt_FMT, cl_point, dof, dim);
651           // Apply isoperiodic transform to donor vertices such that corresponding periodic vertices should match exactly
652           for (PetscInt d = 0; d < dof; d++) mydonor_vertices[donor_vert_offset * dim + d] = PetscRealPart(coords_arr[offset + d]) + PetscRealPart(transform[d][3]);
653           donor_vert_offset++;
654         }
655         // Pack cones of face points (including face itself)
656         for (PetscInt p = 0; p < cl_size; p++) {
657           PetscInt        cl_point = closure[2 * p], cone_size, depth;
658           const PetscInt *cone;
659 
660           PetscCall(DMPlexGetConeSize(dm, cl_point, &cone_size));
661           PetscCall(DMPlexGetCone(dm, cl_point, &cone));
662           PetscCall(DMPlexGetPointDepth(dm, cl_point, &depth));
663           if (depth == 0) continue; // don't include vertex depth
664           mydonor_cones[donor_cone_offset++] = cone_size;
665           mydonor_cones[donor_cone_offset++] = cl_point;
666           PetscArraycpy(&mydonor_cones[donor_cone_offset], cone, cone_size);
667           donor_cone_offset += cone_size;
668         }
669         PetscCall(DMPlexRestoreTransitiveClosure(dm, donor_face, PETSC_TRUE, &cl_size, &closure));
670       }
671       PetscCall(VecRestoreArrayRead(coordinates, &coords_arr));
672       PetscCall(PetscBTDestroy(&rootbt));
673 
674       MPI_Datatype vertex_unit;
675       PetscMPIInt  n;
676       PetscCall(PetscMPIIntCast(dim, &n));
677       PetscCallMPI(MPI_Type_contiguous(n, MPIU_REAL, &vertex_unit));
678       PetscCallMPI(MPI_Type_commit(&vertex_unit));
679       PetscCall(PetscMalloc2(leafnumvert * 3, &leaf_donor_coords, leafconesize, &leaf_donor_cones));
680       PetscCall(PetscSFBcastBegin(sf_vert_coords, vertex_unit, mydonor_vertices, leaf_donor_coords, MPI_REPLACE));
681       PetscCall(PetscSFBcastBegin(sf_face_cones, MPIU_INT, mydonor_cones, leaf_donor_cones, MPI_REPLACE));
682       PetscCall(PetscSFBcastEnd(sf_vert_coords, vertex_unit, mydonor_vertices, leaf_donor_coords, MPI_REPLACE));
683       PetscCall(PetscSFBcastEnd(sf_face_cones, MPIU_INT, mydonor_cones, leaf_donor_cones, MPI_REPLACE));
684       PetscCall(PetscSFDestroy(&sf_vert_coords));
685       PetscCall(PetscSFDestroy(&sf_face_cones));
686       PetscCallMPI(MPI_Type_free(&vertex_unit));
687       PetscCall(PetscFree2(mydonor_vertices, mydonor_cones));
688     }
689 
690     { // Determine periodic orientation w/rt donor vertices and reorient
691       PetscReal tol = PetscSqr(PETSC_MACHINE_EPSILON * 1e3);
692       PetscInt *periodic2donor, dm_depth, maxConeSize;
693       PetscInt  coords_offset = 0, cones_offset = 0;
694 
695       PetscCall(DMPlexGetDepth(dm, &dm_depth));
696       PetscCall(DMPlexGetMaxSizes(dm, &maxConeSize, NULL));
697       PetscCall(DMGetWorkArray(dm, 2 * PetscPowInt(maxConeSize, dm_depth - 1), MPIU_INT, &periodic2donor));
698 
699       // Translate the periodic face vertices into the donor vertices
700       // Translation stored in periodic2donor
701       for (PetscInt i = 0; i < nleaves; i++) {
702         PetscInt  periodic_face = filocal[i], cl_size, num_verts = face_vertices_size[periodic_face - fStart];
703         PetscInt  cones_size = face_cones_size[periodic_face - fStart], p2d_count = 0;
704         PetscInt *closure = NULL;
705 
706         PetscCall(DMPlexGetTransitiveClosure(dm, periodic_face, PETSC_TRUE, &cl_size, &closure));
707         for (PetscInt p = 0; p < cl_size; p++) {
708           PetscInt     cl_point = closure[2 * p], coords_size, donor_vertex = -1;
709           PetscScalar *coords = NULL;
710 
711           if (!IsPointInsideStratum(cl_point, vStart, vEnd)) continue;
712           PetscCall(DMPlexVecGetClosure(dm, csection, coordinates, cl_point, &coords_size, &coords));
713           PetscAssert(coords_size == dim, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " has dof size %" PetscInt_FMT ", but should match dimension size %" PetscInt_FMT, cl_point, coords_size, dim);
714 
715           for (PetscInt v = 0; v < num_verts; v++) {
716             PetscReal dist_sqr = 0;
717             for (PetscInt d = 0; d < coords_size; d++) dist_sqr += PetscSqr(PetscRealPart(coords[d]) - leaf_donor_coords[(v + coords_offset) * dim + d]);
718             if (dist_sqr < tol) {
719               donor_vertex = leaf_donor_cones[cones_offset + v];
720               break;
721             }
722           }
723           PetscCheck(donor_vertex >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Periodic face %" PetscInt_FMT " could not find matching donor vertex for vertex %" PetscInt_FMT, periodic_face, cl_point);
724           if (PetscDefined(USE_DEBUG)) {
725             for (PetscInt c = 0; c < p2d_count; c++) PetscCheck(periodic2donor[2 * c + 1] != donor_vertex, comm, PETSC_ERR_PLIB, "Found repeated cone_point in periodic_ordering");
726           }
727 
728           periodic2donor[2 * p2d_count + 0] = cl_point;
729           periodic2donor[2 * p2d_count + 1] = donor_vertex;
730           p2d_count++;
731           PetscCall(DMPlexVecRestoreClosure(dm, csection, coordinates, cl_point, &coords_size, &coords));
732         }
733         coords_offset += num_verts;
734         PetscCall(DMPlexRestoreTransitiveClosure(dm, periodic_face, PETSC_TRUE, &cl_size, &closure));
735 
736         { // Determine periodic orientation w/rt donor vertices and reorient
737           PetscInt      depth, *p2d_cone, face_is_array[1] = {periodic_face};
738           IS           *is_arrays, face_is;
739           PetscSection *section_arrays;
740           PetscInt     *donor_cone_array = &leaf_donor_cones[cones_offset + num_verts];
741 
742           PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 1, face_is_array, PETSC_USE_POINTER, &face_is));
743           PetscCall(DMPlexGetConeRecursive(dm, face_is, &depth, &is_arrays, &section_arrays));
744           PetscCall(ISDestroy(&face_is));
745           PetscCall(DMGetWorkArray(dm, maxConeSize, MPIU_INT, &p2d_cone));
746           for (PetscInt d = 0; d < depth - 1; d++) {
747             PetscInt        pStart, pEnd;
748             PetscSection    section = section_arrays[d];
749             const PetscInt *periodic_cone_arrays, *periodic_point_arrays;
750 
751             PetscCall(ISGetIndices(is_arrays[d], &periodic_cone_arrays));
752             PetscCall(ISGetIndices(is_arrays[d + 1], &periodic_point_arrays)); // Points at d+1 correspond to the cones at d
753             PetscCall(PetscSectionGetChart(section_arrays[d], &pStart, &pEnd));
754             for (PetscInt p = pStart; p < pEnd; p++) {
755               PetscInt periodic_cone_size, periodic_cone_offset, periodic_point = periodic_point_arrays[p];
756 
757               PetscCall(PetscSectionGetDof(section, p, &periodic_cone_size));
758               PetscCall(PetscSectionGetOffset(section, p, &periodic_cone_offset));
759               const PetscInt *periodic_cone = &periodic_cone_arrays[periodic_cone_offset];
760               PetscCall(TranslateConeP2D(periodic_cone, periodic_cone_size, periodic2donor, p2d_count, p2d_cone));
761 
762               // Find the donor cone that matches the periodic point's cone
763               PetscInt  donor_cone_offset = 0, donor_point = -1, *donor_cone = NULL;
764               PetscBool cone_matches = PETSC_FALSE;
765               while (donor_cone_offset < cones_size - num_verts) {
766                 PetscInt donor_cone_size = donor_cone_array[donor_cone_offset];
767                 donor_point              = donor_cone_array[donor_cone_offset + 1];
768                 donor_cone               = &donor_cone_array[donor_cone_offset + 2];
769 
770                 if (donor_cone_size != periodic_cone_size) goto next_cone;
771                 for (PetscInt c = 0; c < periodic_cone_size; c++) {
772                   cone_matches = SearchIntArray(donor_cone[c], periodic_cone_size, p2d_cone);
773                   if (!cone_matches) goto next_cone;
774                 }
775                 // Save the found donor cone's point to the translation array. These will be used for higher depth points.
776                 // i.e. we save the edge translations for when we look for face cones
777                 periodic2donor[2 * p2d_count + 0] = periodic_point;
778                 periodic2donor[2 * p2d_count + 1] = donor_point;
779                 p2d_count++;
780                 break;
781 
782               next_cone:
783                 donor_cone_offset += donor_cone_size + 2;
784               }
785               PetscCheck(donor_cone_offset < cones_size - num_verts, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find donor cone equivalent to cone of periodic point %" PetscInt_FMT, periodic_point);
786 
787               { // Compare the donor cone with the translated periodic cone and reorient
788                 PetscInt       ornt;
789                 DMPolytopeType cell_type;
790                 PetscBool      found;
791                 PetscCall(DMPlexGetCellType(dm, periodic_point, &cell_type));
792                 PetscCall(DMPolytopeMatchOrientation(cell_type, donor_cone, p2d_cone, &ornt, &found));
793                 PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Could not find transformation between donor (%" PetscInt_FMT ") and periodic (%" PetscInt_FMT ") cone's", periodic_point, donor_point);
794                 if (debug_printing) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Reorienting point %" PetscInt_FMT " by %" PetscInt_FMT "\n", periodic_point, ornt));
795                 PetscCall(DMPlexOrientPointWithCorrections(dm, periodic_point, ornt, localSection, local_vec_length, local_vec_perm));
796               }
797             }
798             PetscCall(ISRestoreIndices(is_arrays[d], &periodic_cone_arrays));
799             PetscCall(ISRestoreIndices(is_arrays[d + 1], &periodic_point_arrays));
800           }
801           PetscCall(DMRestoreWorkArray(dm, maxConeSize, MPIU_INT, &p2d_cone));
802           PetscCall(DMPlexRestoreConeRecursive(dm, face_is, &depth, &is_arrays, &section_arrays));
803         }
804 
805         PetscCall(DMPlexRestoreTransitiveClosure(dm, periodic_face, PETSC_TRUE, &cl_size, &closure));
806         cones_offset += cones_size;
807       }
808       PetscCall(DMRestoreWorkArray(dm, 2 * PetscPowInt(maxConeSize, dm_depth - 1), MPIU_INT, &periodic2donor));
809     }
810     // Re-set local coordinates (i.e. destroy global coordinates if they were modified)
811     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
812 
813     PetscCall(PetscFree2(leaf_donor_coords, leaf_donor_cones));
814     PetscCall(PetscFree2(face_vertices_size, face_cones_size));
815   }
816 
817   if (sfNatural_old) { // Correct SFNatural based on the permutation of the local vector
818     PetscSF      newglob_to_oldglob_sf, sfNatural_old, sfNatural_new;
819     PetscSFNode *remote;
820 
821     { // Translate permutation of local Vec into permutation of global Vec
822       PetscInt g = 0;
823       PetscBT  global_vec_check; // Verify that global indices aren't doubled
824       PetscCall(PetscBTCreate(global_vec_length, &global_vec_check));
825       for (PetscInt l = 0; l < local_vec_length; l++) {
826         PetscInt global_index = local_vec_perm[l];
827         if (global_index < 0) continue;
828         PetscCheck(!PetscBTLookupSet(global_vec_check, global_index), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found duplicate global index %" PetscInt_FMT " in local_vec_perm", global_index);
829         global_vec_perm[g++] = global_index;
830       }
831       PetscCheck(g == global_vec_length, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Wrong number of non-negative local indices");
832       PetscCall(PetscBTDestroy(&global_vec_check));
833     }
834 
835     PetscCall(PetscMalloc1(global_vec_length, &remote));
836     for (PetscInt i = 0; i < global_vec_length; i++) {
837       remote[i].rank  = myrank;
838       remote[i].index = global_vec_perm[i];
839     }
840     PetscCall(PetscFree2(global_vec_perm, local_vec_perm));
841     PetscCall(PetscSFCreate(comm, &newglob_to_oldglob_sf));
842     PetscCall(PetscSFSetGraph(newglob_to_oldglob_sf, global_vec_length, global_vec_length, NULL, PETSC_USE_POINTER, remote, PETSC_OWN_POINTER));
843     PetscCall(DMGetNaturalSF(dm, &sfNatural_old));
844     PetscCall(PetscSFCompose(newglob_to_oldglob_sf, sfNatural_old, &sfNatural_new));
845     PetscCall(DMSetNaturalSF(dm, sfNatural_new));
846     PetscCall(PetscSFDestroy(&sfNatural_new));
847     PetscCall(PetscSFDestroy(&newglob_to_oldglob_sf));
848   }
849   PetscFunctionReturn(PETSC_SUCCESS);
850 }
851 
852 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure.
853 //
854 // Output Arguments:
855 //
856 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
857 //   can be used to create a global section and section SF.
858 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
859 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
860 //
861 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
862 {
863   MPI_Comm           comm;
864   PetscMPIInt        rank;
865   PetscSF            point_sf;
866   PetscInt           nroots, nleaves;
867   const PetscInt    *filocal;
868   const PetscSFNode *firemote;
869 
870   PetscFunctionBegin;
871   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
872   PetscCallMPI(MPI_Comm_rank(comm, &rank));
873   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
874   PetscCall(PetscMalloc1(num_face_sfs, is_points));
875 
876   PetscCall(DMPlexCorrectOrientationForIsoperiodic(dm));
877 
878   for (PetscInt f = 0; f < num_face_sfs; f++) {
879     PetscSF   face_sf = face_sfs[f];
880     PetscInt *cl_sizes;
881     PetscInt  fStart, fEnd, rootbuffersize, leafbuffersize;
882     PetscSF   sf_closure;
883     PetscBT   rootbt;
884 
885     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
886     PetscCall(PetscMalloc1(fEnd - fStart, &cl_sizes));
887     for (PetscInt f = fStart, index = 0; f < fEnd; f++, index++) {
888       PetscInt cl_size, *closure = NULL;
889       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
890       cl_sizes[index] = cl_size - 1;
891       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
892     }
893 
894     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, cl_sizes, &rootbuffersize, &leafbuffersize, &rootbt, &sf_closure));
895     PetscCall(PetscFree(cl_sizes));
896     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
897 
898     PetscSFNode *leaf_donor_closure;
899     { // Pack root buffer with owner for every point in the root cones
900       PetscSFNode       *donor_closure;
901       const PetscInt    *pilocal;
902       const PetscSFNode *piremote;
903       PetscInt           npoints;
904 
905       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
906       PetscCall(PetscCalloc1(rootbuffersize, &donor_closure));
907       for (PetscInt p = 0, root_offset = 0; p < nroots; p++) {
908         if (!PetscBTLookup(rootbt, p)) continue;
909         PetscInt cl_size, *closure = NULL;
910         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
911         for (PetscInt j = 1; j < cl_size; j++) {
912           PetscInt c = closure[2 * j];
913           if (pilocal) {
914             PetscInt found = -1;
915             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
916             if (found >= 0) {
917               donor_closure[root_offset++] = piremote[found];
918               continue;
919             }
920           }
921           // we own c
922           donor_closure[root_offset].rank  = rank;
923           donor_closure[root_offset].index = c;
924           root_offset++;
925         }
926         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
927       }
928 
929       PetscCall(PetscMalloc1(leafbuffersize, &leaf_donor_closure));
930       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
931       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
932       PetscCall(PetscSFDestroy(&sf_closure));
933       PetscCall(PetscFree(donor_closure));
934     }
935 
936     PetscSFNode *new_iremote;
937     PetscCall(PetscCalloc1(nroots, &new_iremote));
938     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
939     // Walk leaves and match vertices
940     for (PetscInt i = 0, leaf_offset = 0; i < nleaves; i++) {
941       PetscInt  point   = filocal[i], cl_size;
942       PetscInt *closure = NULL;
943       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
944       for (PetscInt j = 1; j < cl_size; j++) {
945         PetscInt    c  = closure[2 * j];
946         PetscSFNode lc = leaf_donor_closure[leaf_offset];
947         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
948         if (new_iremote[c].rank == -1) {
949           new_iremote[c] = lc;
950         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
951         leaf_offset++;
952       }
953       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
954     }
955     PetscCall(PetscFree(leaf_donor_closure));
956 
957     // Include face points in closure SF
958     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
959     // consolidate leaves
960     PetscInt *leafdata;
961     PetscCall(PetscMalloc1(nroots, &leafdata));
962     PetscInt num_new_leaves = 0;
963     for (PetscInt i = 0; i < nroots; i++) {
964       if (new_iremote[i].rank == -1) continue;
965       new_iremote[num_new_leaves] = new_iremote[i];
966       leafdata[num_new_leaves]    = i;
967       num_new_leaves++;
968     }
969     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
970 
971     PetscSF csf;
972     PetscCall(PetscSFCreate(comm, &csf));
973     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
974     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
975     PetscCall(PetscFree(leafdata));
976     PetscCall(PetscBTDestroy(&rootbt));
977 
978     PetscInt npoints;
979     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
980     if (npoints < 0) { // empty point_sf
981       *closure_sf = csf;
982     } else {
983       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
984       PetscCall(PetscSFDestroy(&csf));
985     }
986     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
987     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
988   }
989   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
990   PetscFunctionReturn(PETSC_SUCCESS);
991 }
992 
993 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
994 {
995   DM_Plex *plex = (DM_Plex *)dm->data;
996 
997   PetscFunctionBegin;
998   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
999   if (sf) *sf = plex->periodic.composed_sf;
1000   PetscFunctionReturn(PETSC_SUCCESS);
1001 }
1002 
1003 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
1004 {
1005   DM_Plex    *plex = (DM_Plex *)old_dm->data;
1006   PetscSF     sf_point, *new_face_sfs;
1007   PetscMPIInt rank;
1008 
1009   PetscFunctionBegin;
1010   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
1011   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
1012   PetscCall(DMGetPointSF(dm, &sf_point));
1013   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
1014 
1015   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
1016     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
1017     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
1018     const PetscInt    *old_local, *point_local;
1019     const PetscSFNode *old_remote, *point_remote;
1020 
1021     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
1022     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
1023     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
1024     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
1025     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
1026 
1027     // Fill new_leafdata with new owners of all points
1028     for (PetscInt i = 0; i < new_npoints; i++) {
1029       new_leafdata[i].rank  = rank;
1030       new_leafdata[i].index = i;
1031     }
1032     for (PetscInt i = 0; i < point_nleaf; i++) {
1033       PetscInt j      = point_local[i];
1034       new_leafdata[j] = point_remote[i];
1035     }
1036     // REPLACE is okay because every leaf agrees about the new owners
1037     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
1038     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
1039     // rootdata now contains the new owners
1040 
1041     // Send to leaves of old space
1042     for (PetscInt i = 0; i < old_npoints; i++) {
1043       leafdata[i].rank  = -1;
1044       leafdata[i].index = -1;
1045     }
1046     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
1047     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
1048 
1049     // Send to new leaf space
1050     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
1051     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
1052 
1053     PetscInt     nface = 0, *new_local;
1054     PetscSFNode *new_remote;
1055     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
1056     PetscCall(PetscMalloc1(nface, &new_local));
1057     PetscCall(PetscMalloc1(nface, &new_remote));
1058     nface = 0;
1059     for (PetscInt i = 0; i < new_npoints; i++) {
1060       if (new_leafdata[i].rank == -1) continue;
1061       new_local[nface]  = i;
1062       new_remote[nface] = new_leafdata[i];
1063       nface++;
1064     }
1065     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
1066     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
1067     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
1068     {
1069       char new_face_sf_name[PETSC_MAX_PATH_LEN];
1070       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
1071       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
1072     }
1073   }
1074 
1075   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
1076   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
1077   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
1078   PetscCall(PetscFree(new_face_sfs));
1079   PetscFunctionReturn(PETSC_SUCCESS);
1080 }
1081 
1082 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
1083 {
1084   DM_Plex   *plex = (DM_Plex *)dm->data;
1085   PetscCount count;
1086   IS         isdof;
1087   PetscInt   dim;
1088 
1089   PetscFunctionBegin;
1090   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
1091   PetscCheck(plex->periodic.periodic_points, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Isoperiodic PointSF must be created before this function is called");
1092 
1093   PetscCall(DMGetCoordinateDim(dm, &dim));
1094   dm->periodic.num_affines = plex->periodic.num_face_sfs;
1095   PetscCall(PetscFree2(dm->periodic.affine_to_local, dm->periodic.affine));
1096   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
1097 
1098   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
1099     PetscInt        npoints, vsize, isize;
1100     const PetscInt *points;
1101     IS              is = plex->periodic.periodic_points[f];
1102     PetscSegBuffer  seg;
1103     PetscSection    section;
1104     PetscInt       *ind;
1105     Vec             L, P;
1106     VecType         vec_type;
1107     VecScatter      scatter;
1108     PetscScalar    *x;
1109 
1110     PetscCall(DMGetLocalSection(dm, &section));
1111     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
1112     PetscCall(ISGetSize(is, &npoints));
1113     PetscCall(ISGetIndices(is, &points));
1114     for (PetscInt i = 0; i < npoints; i++) {
1115       PetscInt point = points[i], off, dof;
1116 
1117       PetscCall(PetscSectionGetOffset(section, point, &off));
1118       PetscCall(PetscSectionGetDof(section, point, &dof));
1119       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
1120       for (PetscInt j = 0; j < dof / dim; j++) {
1121         PetscInt *slot;
1122 
1123         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
1124         *slot = off / dim + j;
1125       }
1126     }
1127     PetscCall(PetscSegBufferGetSize(seg, &count));
1128     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
1129     PetscCall(PetscSegBufferDestroy(&seg));
1130     PetscCall(PetscIntCast(count, &isize));
1131     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));
1132 
1133     PetscCall(PetscIntCast(count * dim, &vsize));
1134     PetscCall(DMGetLocalVector(dm, &L));
1135     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
1136     PetscCall(VecSetSizes(P, vsize, vsize));
1137     PetscCall(VecGetType(L, &vec_type));
1138     PetscCall(VecSetType(P, vec_type));
1139     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
1140     PetscCall(DMRestoreLocalVector(dm, &L));
1141     PetscCall(ISDestroy(&isdof));
1142 
1143     PetscCall(VecGetArrayWrite(P, &x));
1144     for (PetscCount i = 0; i < count; i++) {
1145       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
1146     }
1147     PetscCall(VecRestoreArrayWrite(P, &x));
1148 
1149     dm->periodic.affine_to_local[f] = scatter;
1150     dm->periodic.affine[f]          = P;
1151   }
1152   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
1153   PetscFunctionReturn(PETSC_SUCCESS);
1154 }
1155 
1156 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
1157 {
1158   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
1159   const Ijk closure_1[] = {
1160     {0, 0, 0},
1161     {1, 0, 0},
1162   };
1163   const Ijk closure_2[] = {
1164     {0, 0, 0},
1165     {1, 0, 0},
1166     {1, 1, 0},
1167     {0, 1, 0},
1168   };
1169   const Ijk closure_3[] = {
1170     {0, 0, 0},
1171     {0, 1, 0},
1172     {1, 1, 0},
1173     {1, 0, 0},
1174     {0, 0, 1},
1175     {1, 0, 1},
1176     {1, 1, 1},
1177     {0, 1, 1},
1178   };
1179   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
1180   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
1181   const PetscInt        face_marker_1[]   = {1, 2};
1182   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
1183   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
1184   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
1185   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
1186   // These orientations can be determined by examining cones of a reference quad and hex element.
1187   const PetscInt        face_orient_1[]   = {0, 0};
1188   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
1189   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
1190   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
1191 
1192   PetscFunctionBegin;
1193   PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
1194   PetscAssertPointer(dm, 1);
1195   PetscValidLogicalCollectiveInt(dm, dim, 2);
1196   PetscCall(DMSetDimension(dm, dim));
1197   PetscMPIInt rank, size;
1198   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
1199   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
1200   for (PetscInt i = 0; i < dim; i++) {
1201     eextent[i] = faces[i];
1202     vextent[i] = faces[i] + 1;
1203   }
1204   ZLayout layout;
1205   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
1206   PetscZSet vset; // set of all vertices in the closure of the owned elements
1207   PetscCall(PetscZSetCreate(&vset));
1208   PetscInt local_elems = 0;
1209   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
1210     Ijk loc = ZCodeSplit(z);
1211     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
1212     else {
1213       z += ZStepOct(z);
1214       continue;
1215     }
1216     if (IjkActive(layout.eextent, loc)) {
1217       local_elems++;
1218       // Add all neighboring vertices to set
1219       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
1220         Ijk   inc  = closure_dim[dim][n];
1221         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
1222         ZCode v    = ZEncode(nloc);
1223         PetscCall(PetscZSetAdd(vset, v));
1224       }
1225     }
1226   }
1227   PetscInt local_verts, off = 0;
1228   ZCode   *vert_z;
1229   PetscCall(PetscZSetGetSize(vset, &local_verts));
1230   PetscCall(PetscMalloc1(local_verts, &vert_z));
1231   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
1232   PetscCall(PetscZSetDestroy(&vset));
1233   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
1234   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
1235 
1236   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
1237   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
1238   PetscCall(DMSetUp(dm));
1239   {
1240     PetscInt e = 0;
1241     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
1242       Ijk loc = ZCodeSplit(z);
1243       if (!IjkActive(layout.eextent, loc)) {
1244         z += ZStepOct(z);
1245         continue;
1246       }
1247       PetscInt cone[8], orient[8] = {0};
1248       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
1249         Ijk      inc  = closure_dim[dim][n];
1250         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
1251         ZCode    v    = ZEncode(nloc);
1252         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
1253         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
1254         cone[n] = local_elems + ci;
1255       }
1256       PetscCall(DMPlexSetCone(dm, e, cone));
1257       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
1258       e++;
1259     }
1260   }
1261 
1262   PetscCall(DMPlexSymmetrize(dm));
1263   PetscCall(DMPlexStratify(dm));
1264 
1265   { // Create point SF
1266     PetscSF sf;
1267     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
1268     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
1269     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
1270     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
1271     PetscInt    *local_ghosts;
1272     PetscSFNode *ghosts;
1273     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
1274     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
1275     for (PetscInt i = 0; i < num_ghosts;) {
1276       ZCode       z = vert_z[owned_verts + i];
1277       PetscMPIInt remote_rank, remote_count = 0;
1278 
1279       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout.zstarts), &remote_rank));
1280       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
1281       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
1282 
1283       // Count the elements on remote_rank
1284       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
1285 
1286       // Traverse vertices and make ghost links
1287       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
1288         Ijk loc = ZCodeSplit(rz);
1289         if (rz == z) {
1290           local_ghosts[i] = local_elems + owned_verts + i;
1291           ghosts[i].rank  = remote_rank;
1292           ghosts[i].index = remote_elem + remote_count;
1293           i++;
1294           if (i == num_ghosts) break;
1295           z = vert_z[owned_verts + i];
1296         }
1297         if (IjkActive(layout.vextent, loc)) remote_count++;
1298         else rz += ZStepOct(rz);
1299       }
1300     }
1301     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
1302     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
1303     PetscCall(DMSetPointSF(dm, sf));
1304     PetscCall(PetscSFDestroy(&sf));
1305   }
1306   {
1307     Vec          coordinates;
1308     PetscScalar *coords;
1309     PetscSection coord_section;
1310     PetscInt     coord_size;
1311     PetscCall(DMGetCoordinateSection(dm, &coord_section));
1312     PetscCall(PetscSectionSetNumFields(coord_section, 1));
1313     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
1314     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
1315     for (PetscInt v = 0; v < local_verts; v++) {
1316       PetscInt point = local_elems + v;
1317       PetscCall(PetscSectionSetDof(coord_section, point, dim));
1318       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
1319     }
1320     PetscCall(PetscSectionSetUp(coord_section));
1321     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
1322     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
1323     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
1324     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
1325     PetscCall(VecSetBlockSize(coordinates, dim));
1326     PetscCall(VecSetType(coordinates, VECSTANDARD));
1327     PetscCall(VecGetArray(coordinates, &coords));
1328     for (PetscInt v = 0; v < local_verts; v++) {
1329       Ijk loc             = ZCodeSplit(vert_z[v]);
1330       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
1331       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
1332       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
1333     }
1334     PetscCall(VecRestoreArray(coordinates, &coords));
1335     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
1336     PetscCall(VecDestroy(&coordinates));
1337   }
1338   if (interpolate) {
1339     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
1340 
1341     DMLabel label;
1342     PetscCall(DMCreateLabel(dm, "Face Sets"));
1343     PetscCall(DMGetLabel(dm, "Face Sets", &label));
1344     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
1345     for (PetscInt i = 0; i < 3; i++) {
1346       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
1347       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
1348       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
1349     }
1350     PetscInt fStart, fEnd, vStart, vEnd;
1351     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
1352     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
1353     for (PetscInt f = fStart; f < fEnd; f++) {
1354       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
1355       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
1356       PetscInt bc_count[6] = {0};
1357       for (PetscInt i = 0; i < npoints; i++) {
1358         PetscInt p = points[2 * i];
1359         if (!IsPointInsideStratum(p, vStart, vEnd)) continue;
1360         fverts[num_fverts++] = p;
1361         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
1362         // Convention here matches DMPlexCreateCubeMesh_Internal
1363         bc_count[0] += loc.i == 0;
1364         bc_count[1] += loc.i == layout.vextent.i - 1;
1365         bc_count[2] += loc.j == 0;
1366         bc_count[3] += loc.j == layout.vextent.j - 1;
1367         bc_count[4] += loc.k == 0;
1368         bc_count[5] += loc.k == layout.vextent.k - 1;
1369       }
1370       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
1371       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
1372         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
1373           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
1374           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
1375             PetscInt *put;
1376             if (bc % 2 == 0) { // donor face; no label
1377               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
1378               *put = f;
1379             } else { // periodic face
1380               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
1381               *put = f;
1382               ZCode *zput;
1383               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
1384               for (PetscInt i = 0; i < num_fverts; i++) {
1385                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
1386                 switch (bc / 2) {
1387                 case 0:
1388                   loc.i = 0;
1389                   break;
1390                 case 1:
1391                   loc.j = 0;
1392                   break;
1393                 case 2:
1394                   loc.k = 0;
1395                   break;
1396                 }
1397                 *zput++ = ZEncode(loc);
1398               }
1399             }
1400             continue;
1401           }
1402           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
1403           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
1404           bc_match++;
1405         }
1406       }
1407     }
1408     // Ensure that the Coordinate DM has our new boundary labels
1409     DM cdm;
1410     PetscCall(DMGetCoordinateDM(dm, &cdm));
1411     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
1412     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
1413       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
1414     }
1415     for (PetscInt i = 0; i < 3; i++) {
1416       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
1417       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
1418       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
1419     }
1420   }
1421   PetscCall(PetscFree(layout.zstarts));
1422   PetscCall(PetscFree(vert_z));
1423   PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
1424   PetscFunctionReturn(PETSC_SUCCESS);
1425 }
1426 
1427 /*@
1428   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
1429 
1430   Logically Collective
1431 
1432   Input Parameters:
1433 + dm           - The `DMPLEX` on which to set periodicity
1434 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
1435 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1436 
1437   Level: advanced
1438 
1439   Note:
1440   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
1441   and locally, but are paired when creating a global dof space.
1442 
1443 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
1444 @*/
1445 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
1446 {
1447   DM_Plex *plex = (DM_Plex *)dm->data;
1448 
1449   PetscFunctionBegin;
1450   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1451   if (num_face_sfs) {
1452     PetscAssertPointer(face_sfs, 3);
1453     PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
1454   } else PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", NULL));
1455   if (num_face_sfs == plex->periodic.num_face_sfs && (num_face_sfs == 0 || face_sfs == plex->periodic.face_sfs)) PetscFunctionReturn(PETSC_SUCCESS);
1456   PetscCall(DMSetGlobalSection(dm, NULL));
1457 
1458   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
1459   if (plex->periodic.num_face_sfs > 0) {
1460     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
1461     PetscCall(PetscFree(plex->periodic.face_sfs));
1462   }
1463 
1464   plex->periodic.num_face_sfs = num_face_sfs;
1465   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
1466   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
1467 
1468   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
1469   if (cdm) {
1470     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
1471     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
1472   }
1473   PetscFunctionReturn(PETSC_SUCCESS);
1474 }
1475 
1476 /*@C
1477   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
1478 
1479   Logically Collective
1480 
1481   Input Parameter:
1482 . dm - The `DMPLEX` for which to obtain periodic relation
1483 
1484   Output Parameters:
1485 + num_face_sfs - Number of `PetscSF`s in the array
1486 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1487 
1488   Level: advanced
1489 
1490 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1491 @*/
1492 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1493 {
1494   PetscBool isPlex;
1495 
1496   PetscFunctionBegin;
1497   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1498   PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isPlex));
1499   if (isPlex) {
1500     DM_Plex *plex = (DM_Plex *)dm->data;
1501     if (face_sfs) *face_sfs = plex->periodic.face_sfs;
1502     if (num_face_sfs) *num_face_sfs = plex->periodic.num_face_sfs;
1503   } else {
1504     if (face_sfs) *face_sfs = NULL;
1505     if (num_face_sfs) *num_face_sfs = 0;
1506   }
1507   PetscFunctionReturn(PETSC_SUCCESS);
1508 }
1509 
1510 /*@C
1511   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1512 
1513   Logically Collective
1514 
1515   Input Parameters:
1516 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1517 . n  - Number of transforms in array
1518 - t  - Array of 4x4 affine transformation basis.
1519 
1520   Level: advanced
1521 
1522   Notes:
1523   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1524   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1525   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1526   simple matrix multiplication.
1527 
1528   Although the interface accepts a general affine transform, only affine translation is supported at present.
1529 
1530   Developer Notes:
1531   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1532   adding GPU implementations to apply the G2L/L2G transforms.
1533 
1534 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1535 @*/
1536 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1537 {
1538   DM_Plex *plex = (DM_Plex *)dm->data;
1539 
1540   PetscFunctionBegin;
1541   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1542   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1543 
1544   PetscCall(PetscFree(plex->periodic.transform));
1545   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1546   for (PetscInt i = 0; i < n; i++) {
1547     for (PetscInt j = 0; j < 4; j++) {
1548       for (PetscInt k = 0; k < 4; k++) {
1549         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1550         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1551       }
1552     }
1553   }
1554   PetscFunctionReturn(PETSC_SUCCESS);
1555 }
1556