xref: /petsc/src/dm/impls/plex/plexsfc.c (revision 2b4f33d9851e665c71c735db5ecd66df92829a75)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 // ***** Overview of ZCode *******
21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index.
22 // This is known as Morton encoding, and is refered to as ZCode in this code.
23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be:
24 //    [k2,j2,i2,k1,j1,i1,k0,j0,i0]
25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`).
26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices.
27 
28 // Decodes the leading interleaved index from a ZCode
29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0]
30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 (translated to octal)
31 static unsigned ZCodeSplit1(ZCode z)
32 {
33   z &= 0111111111111111111111;
34   z = (z | z >> 2) & 0103030303030303030303;
35   z = (z | z >> 4) & 0100170017001700170017;
36   z = (z | z >> 8) & 0370000037700000377;
37   z = (z | z >> 16) & 0370000000000177777;
38   z = (z | z >> 32) & 07777777;
39   return (unsigned)z;
40 }
41 
42 // Encodes the leading interleaved index from a ZCode
43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0]
44 static ZCode ZEncode1(unsigned t)
45 {
46   ZCode z = t;
47   z &= 07777777;
48   z = (z | z << 32) & 0370000000000177777;
49   z = (z | z << 16) & 0370000037700000377;
50   z = (z | z << 8) & 0100170017001700170017;
51   z = (z | z << 4) & 0103030303030303030303;
52   z = (z | z << 2) & 0111111111111111111111;
53   return z;
54 }
55 
56 // Decodes i j k indices from a ZCode.
57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode
58 static Ijk ZCodeSplit(ZCode z)
59 {
60   Ijk c;
61   c.i = ZCodeSplit1(z >> 2);
62   c.j = ZCodeSplit1(z >> 1);
63   c.k = ZCodeSplit1(z >> 0);
64   return c;
65 }
66 
67 // Encodes i j k indices to a ZCode.
68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement
69 static ZCode ZEncode(Ijk c)
70 {
71   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
72   return z;
73 }
74 
75 static PetscBool IjkActive(Ijk extent, Ijk l)
76 {
77   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
78   return PETSC_FALSE;
79 }
80 
81 // If z is not the base of an octet (last 3 bits 0), return 0.
82 //
83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
85 static ZCode ZStepOct(ZCode z)
86 {
87   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
88   ZCode step = 07;
89   for (; (z & step) == 0; step = (step << 3) | 07) { }
90   return step >> 3;
91 }
92 
93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
95 {
96   PetscFunctionBegin;
97   layout->eextent.i = eextent[0];
98   layout->eextent.j = eextent[1];
99   layout->eextent.k = eextent[2];
100   layout->vextent.i = vextent[0];
101   layout->vextent.j = vextent[1];
102   layout->vextent.k = vextent[2];
103   layout->comm_size = size;
104   layout->zstarts   = NULL;
105   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
106 
107   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
108   ZCode    z           = 0;
109   layout->zstarts[0]   = 0;
110   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
111   for (PetscMPIInt r = 0; r < size; r++) {
112     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
113     for (count = 0; count < elems_needed; z++) {
114       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
115       for (ZCode s = skip;; s >>= 3) {
116         Ijk trial = ZCodeSplit(z + s);
117         if (IjkActive(layout->eextent, trial)) {
118           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
119           count += s + 1;
120           z += s;
121           break;
122         }
123         if (s == 0) { // the whole skip octet is inactive
124           z += skip;
125           break;
126         }
127       }
128     }
129     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
130     //
131     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
132     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
133     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
134     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
135     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
136     // complicate the job of identifying an owner and its offset.
137     //
138     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
139     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
140     // the issue:
141     //
142     // Consider this partition on rank 0 (left) and rank 1.
143     //
144     //    4 --------  5 -- 14 --10 -- 21 --11
145     //                |          |          |
146     // 7 -- 16 --  8  |          |          |
147     // |           |  3 -------  7 -------  9
148     // |           |             |          |
149     // 4 --------  6 ------ 10   |          |
150     // |           |         |   6 -- 16 -- 8
151     // |           |         |
152     // 3 ---11---  5 --18--  9
153     //
154     // The periodic face SF looks like
155     // [0] Number of roots=21, leaves=1, remote ranks=1
156     // [0] 16 <- (0,11)
157     // [1] Number of roots=22, leaves=2, remote ranks=2
158     // [1] 14 <- (0,18)
159     // [1] 21 <- (1,16)
160     //
161     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
162     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
163     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
164     //
165     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
166     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
167     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
168     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
169     // stranded vertices.
170     for (; z <= ZEncode(layout->vextent); z++) {
171       Ijk loc = ZCodeSplit(z);
172       if (IjkActive(layout->eextent, loc)) break;
173       z += ZStepOct(z);
174     }
175     layout->zstarts[r + 1] = z;
176   }
177   layout->zstarts[size] = ZEncode(layout->vextent);
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
182 {
183   PetscInt remote_elem = 0;
184   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
185     Ijk loc = ZCodeSplit(rz);
186     if (IjkActive(layout->eextent, loc)) remote_elem++;
187     else rz += ZStepOct(rz);
188   }
189   return remote_elem;
190 }
191 
192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
193 {
194   PetscInt lo = 0, hi = n;
195 
196   if (n == 0) return -1;
197   while (hi - lo > 1) {
198     PetscInt mid = lo + (hi - lo) / 2;
199     if (key < X[mid]) hi = mid;
200     else lo = mid;
201   }
202   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
203 }
204 
205 static inline PetscBool IsPointInsideStratum(PetscInt point, PetscInt pStart, PetscInt pEnd)
206 {
207   return (point >= pStart && point < pEnd) ? PETSC_TRUE : PETSC_FALSE;
208 }
209 
210 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
211 {
212   MPI_Comm    comm;
213   PetscInt    dim, vStart, vEnd;
214   PetscMPIInt size;
215   PetscSF     face_sfs[3];
216   PetscScalar transforms[3][4][4] = {{{0}}};
217 
218   PetscFunctionBegin;
219   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
220   PetscCallMPI(MPI_Comm_size(comm, &size));
221   PetscCall(DMGetDimension(dm, &dim));
222   const PetscInt csize = PetscPowInt(2, dim - 1);
223   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
224 
225   PetscInt num_directions = 0;
226   for (PetscInt direction = 0; direction < dim; direction++) {
227     PetscCount   num_faces;
228     PetscInt    *faces;
229     ZCode       *donor_verts, *donor_minz;
230     PetscSFNode *leaf;
231     PetscCount   num_multiroots = 0;
232     PetscInt     pStart, pEnd;
233     PetscBool    sorted;
234     PetscInt     inum_faces;
235 
236     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
237     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
238     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
239     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
240     PetscCall(PetscMalloc1(num_faces, &donor_minz));
241     PetscCall(PetscMalloc1(num_faces, &leaf));
242     for (PetscCount i = 0; i < num_faces; i++) {
243       ZCode minz = donor_verts[i * csize];
244 
245       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
246       donor_minz[i] = minz;
247     }
248     PetscCall(PetscIntCast(num_faces, &inum_faces));
249     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
250     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
251     // Checking for sorting is a cheap check that there are no duplicates.
252     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
253     for (PetscCount i = 0; i < num_faces;) {
254       ZCode       z = donor_minz[i];
255       PetscMPIInt remote_rank, remote_count = 0;
256 
257       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout->zstarts), &remote_rank));
258       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
259       // Process all the vertices on this rank
260       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
261         Ijk loc = ZCodeSplit(rz);
262 
263         if (rz == z) {
264           leaf[i].rank  = remote_rank;
265           leaf[i].index = remote_count;
266           i++;
267           if (i == num_faces) break;
268           z = donor_minz[i];
269         }
270         if (IjkActive(layout->vextent, loc)) remote_count++;
271       }
272     }
273     PetscCall(PetscFree(donor_minz));
274     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
275     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
276     const PetscInt *my_donor_degree;
277     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
278     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
279 
280     for (PetscInt i = 0; i < vEnd - vStart; i++) {
281       num_multiroots += my_donor_degree[i];
282       if (my_donor_degree[i] == 0) continue;
283       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
284     }
285     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
286     PetscCount num_my_donors;
287 
288     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
289     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots);
290     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
291     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
292     for (PetscCount i = 0; i < num_my_donors; i++) {
293       PetscInt f = my_donors[i];
294       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;
295 
296       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
297       for (PetscInt j = 0; j < num_points; j++) {
298         PetscInt p = points[2 * j];
299         if (!IsPointInsideStratum(p, vStart, vEnd)) continue;
300         minv = PetscMin(minv, p);
301       }
302       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
303       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
304       my_donor_indices[minv - vStart] = f;
305     }
306     PetscCall(PetscMalloc1(num_faces, &donor_indices));
307     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
308     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
309     PetscCall(PetscFree(my_donor_indices));
310     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
311     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
312     PetscCall(PetscFree(donor_indices));
313     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
314     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
315     {
316       char face_sf_name[PETSC_MAX_PATH_LEN];
317       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
318       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
319     }
320 
321     transforms[num_directions][0][0]         = 1;
322     transforms[num_directions][1][1]         = 1;
323     transforms[num_directions][2][2]         = 1;
324     transforms[num_directions][3][3]         = 1;
325     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
326     num_directions++;
327   }
328 
329   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
330   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
331 
332   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
337 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
338 // We use this crude approach here so we don't have to write new GPU kernels yet.
339 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
340 {
341   PetscFunctionBegin;
342   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
343   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
344     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
345     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
346   }
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 // Creates SF to communicate data from donor to periodic faces. The data can be different sizes per donor-periodic pair and is given in `point_sizes[]`
351 PetscErrorCode CreateDonorToPeriodicSF(DM dm, PetscSF face_sf, PetscInt pStart, PetscInt pEnd, const PetscInt point_sizes[], PetscInt *rootbuffersize, PetscInt *leafbuffersize, PetscBT *rootbt, PetscSF *sf_closure)
352 {
353   MPI_Comm           comm;
354   PetscMPIInt        rank;
355   PetscInt           nroots, nleaves;
356   PetscInt          *rootdata, *leafdata;
357   const PetscInt    *filocal;
358   const PetscSFNode *firemote;
359 
360   PetscFunctionBeginUser;
361   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
362   PetscCallMPI(MPI_Comm_rank(comm, &rank));
363 
364   PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
365   PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
366   for (PetscInt i = 0; i < nleaves; i++) {
367     PetscInt point = filocal[i];
368     PetscCheck(IsPointInsideStratum(point, pStart, pEnd), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in leaves exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", point, pStart, pEnd);
369     leafdata[point] = point_sizes[point - pStart];
370   }
371   PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
372   PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
373 
374   PetscInt root_offset = 0;
375   PetscCall(PetscBTCreate(nroots, rootbt));
376   for (PetscInt p = 0; p < nroots; p++) {
377     const PetscInt *donor_dof = rootdata + nroots;
378     if (donor_dof[p] == 0) {
379       rootdata[2 * p]     = -1;
380       rootdata[2 * p + 1] = -1;
381       continue;
382     }
383     PetscCall(PetscBTSet(*rootbt, p));
384     PetscCheck(IsPointInsideStratum(p, pStart, pEnd), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in roots exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", p, pStart, pEnd);
385     PetscInt p_size = point_sizes[p - pStart];
386     PetscCheck(donor_dof[p] == p_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf data size (%" PetscInt_FMT ") does not match root data size (%" PetscInt_FMT ")", donor_dof[p], p_size);
387     rootdata[2 * p]     = root_offset;
388     rootdata[2 * p + 1] = p_size;
389     root_offset += p_size;
390   }
391   PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
392   PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
393   // Count how many leaves we need to communicate the closures
394   PetscInt leaf_offset = 0;
395   for (PetscInt i = 0; i < nleaves; i++) {
396     PetscInt point = filocal[i];
397     if (leafdata[2 * point + 1] < 0) continue;
398     leaf_offset += leafdata[2 * point + 1];
399   }
400 
401   PetscSFNode *closure_leaf;
402   PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
403   leaf_offset = 0;
404   for (PetscInt i = 0; i < nleaves; i++) {
405     PetscInt point   = filocal[i];
406     PetscInt cl_size = leafdata[2 * point + 1];
407     if (cl_size < 0) continue;
408     for (PetscInt j = 0; j < cl_size; j++) {
409       closure_leaf[leaf_offset].rank  = firemote[i].rank;
410       closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
411       leaf_offset++;
412     }
413   }
414 
415   PetscCall(PetscSFCreate(comm, sf_closure));
416   PetscCall(PetscSFSetGraph(*sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
417   *rootbuffersize = root_offset;
418   *leafbuffersize = leaf_offset;
419   PetscCall(PetscFree2(rootdata, leafdata));
420   PetscFunctionReturn(PETSC_SUCCESS);
421 }
422 
423 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
424 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
425 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
426 //
427 // Output Arguments:
428 //
429 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
430 //   can be used to create a global section and section SF.
431 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
432 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
433 //
434 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
435 {
436   MPI_Comm           comm;
437   PetscMPIInt        rank;
438   PetscSF            point_sf;
439   PetscInt           nroots, nleaves;
440   const PetscInt    *filocal;
441   const PetscSFNode *firemote;
442 
443   PetscFunctionBegin;
444   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
445   PetscCallMPI(MPI_Comm_rank(comm, &rank));
446   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
447   PetscCall(PetscMalloc1(num_face_sfs, is_points));
448 
449   for (PetscInt f = 0; f < num_face_sfs; f++) {
450     PetscSF   face_sf = face_sfs[f];
451     PetscInt *cl_sizes;
452     PetscInt  fStart, fEnd, rootbuffersize, leafbuffersize;
453     PetscSF   sf_closure;
454     PetscBT   rootbt;
455 
456     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
457     PetscCall(PetscMalloc1(fEnd - fStart, &cl_sizes));
458     for (PetscInt f = fStart, index = 0; f < fEnd; f++, index++) {
459       PetscInt cl_size, *closure = NULL;
460       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
461       cl_sizes[index] = cl_size - 1;
462       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure));
463     }
464 
465     PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, cl_sizes, &rootbuffersize, &leafbuffersize, &rootbt, &sf_closure));
466     PetscCall(PetscFree(cl_sizes));
467     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
468 
469     PetscSFNode *leaf_donor_closure;
470     { // Pack root buffer with owner for every point in the root cones
471       PetscSFNode       *donor_closure;
472       const PetscInt    *pilocal;
473       const PetscSFNode *piremote;
474       PetscInt           npoints;
475 
476       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
477       PetscCall(PetscCalloc1(rootbuffersize, &donor_closure));
478       for (PetscInt p = 0, root_offset = 0; p < nroots; p++) {
479         if (!PetscBTLookup(rootbt, p)) continue;
480         PetscInt cl_size, *closure = NULL;
481         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
482         for (PetscInt j = 1; j < cl_size; j++) {
483           PetscInt c = closure[2 * j];
484           if (pilocal) {
485             PetscInt found = -1;
486             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
487             if (found >= 0) {
488               donor_closure[root_offset++] = piremote[found];
489               continue;
490             }
491           }
492           // we own c
493           donor_closure[root_offset].rank  = rank;
494           donor_closure[root_offset].index = c;
495           root_offset++;
496         }
497         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
498       }
499 
500       PetscCall(PetscMalloc1(leafbuffersize, &leaf_donor_closure));
501       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
502       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
503       PetscCall(PetscSFDestroy(&sf_closure));
504       PetscCall(PetscFree(donor_closure));
505     }
506 
507     PetscSFNode *new_iremote;
508     PetscCall(PetscCalloc1(nroots, &new_iremote));
509     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
510     // Walk leaves and match vertices
511     for (PetscInt i = 0, leaf_offset = 0; i < nleaves; i++) {
512       PetscInt  point   = filocal[i], cl_size;
513       PetscInt *closure = NULL;
514       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
515       for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
516         PetscInt    c  = closure[2 * j];
517         PetscSFNode lc = leaf_donor_closure[leaf_offset];
518         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
519         if (new_iremote[c].rank == -1) {
520           new_iremote[c] = lc;
521         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
522         leaf_offset++;
523       }
524       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
525     }
526     PetscCall(PetscFree(leaf_donor_closure));
527 
528     // Include face points in closure SF
529     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
530     // consolidate leaves
531     PetscInt *leafdata;
532     PetscCall(PetscMalloc1(nroots, &leafdata));
533     PetscInt num_new_leaves = 0;
534     for (PetscInt i = 0; i < nroots; i++) {
535       if (new_iremote[i].rank == -1) continue;
536       new_iremote[num_new_leaves] = new_iremote[i];
537       leafdata[num_new_leaves]    = i;
538       num_new_leaves++;
539     }
540     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
541 
542     PetscSF csf;
543     PetscCall(PetscSFCreate(comm, &csf));
544     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
545     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
546     PetscCall(PetscFree(leafdata));
547     PetscCall(PetscBTDestroy(&rootbt));
548 
549     PetscInt npoints;
550     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
551     if (npoints < 0) { // empty point_sf
552       *closure_sf = csf;
553     } else {
554       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
555       PetscCall(PetscSFDestroy(&csf));
556     }
557     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
558     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
559   }
560   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
561   PetscFunctionReturn(PETSC_SUCCESS);
562 }
563 
564 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
565 {
566   DM_Plex *plex = (DM_Plex *)dm->data;
567 
568   PetscFunctionBegin;
569   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
570   if (sf) *sf = plex->periodic.composed_sf;
571   PetscFunctionReturn(PETSC_SUCCESS);
572 }
573 
574 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
575 {
576   DM_Plex    *plex = (DM_Plex *)old_dm->data;
577   PetscSF     sf_point, *new_face_sfs;
578   PetscMPIInt rank;
579 
580   PetscFunctionBegin;
581   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
582   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
583   PetscCall(DMGetPointSF(dm, &sf_point));
584   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
585 
586   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
587     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
588     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
589     const PetscInt    *old_local, *point_local;
590     const PetscSFNode *old_remote, *point_remote;
591 
592     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
593     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
594     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
595     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
596     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
597 
598     // Fill new_leafdata with new owners of all points
599     for (PetscInt i = 0; i < new_npoints; i++) {
600       new_leafdata[i].rank  = rank;
601       new_leafdata[i].index = i;
602     }
603     for (PetscInt i = 0; i < point_nleaf; i++) {
604       PetscInt j      = point_local[i];
605       new_leafdata[j] = point_remote[i];
606     }
607     // REPLACE is okay because every leaf agrees about the new owners
608     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
609     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
610     // rootdata now contains the new owners
611 
612     // Send to leaves of old space
613     for (PetscInt i = 0; i < old_npoints; i++) {
614       leafdata[i].rank  = -1;
615       leafdata[i].index = -1;
616     }
617     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
618     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
619 
620     // Send to new leaf space
621     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
622     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
623 
624     PetscInt     nface = 0, *new_local;
625     PetscSFNode *new_remote;
626     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
627     PetscCall(PetscMalloc1(nface, &new_local));
628     PetscCall(PetscMalloc1(nface, &new_remote));
629     nface = 0;
630     for (PetscInt i = 0; i < new_npoints; i++) {
631       if (new_leafdata[i].rank == -1) continue;
632       new_local[nface]  = i;
633       new_remote[nface] = new_leafdata[i];
634       nface++;
635     }
636     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
637     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
638     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
639     {
640       char new_face_sf_name[PETSC_MAX_PATH_LEN];
641       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
642       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
643     }
644   }
645 
646   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
647   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
648   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
649   PetscCall(PetscFree(new_face_sfs));
650   PetscFunctionReturn(PETSC_SUCCESS);
651 }
652 
653 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
654 {
655   DM_Plex   *plex = (DM_Plex *)dm->data;
656   PetscCount count;
657   IS         isdof;
658   PetscInt   dim;
659 
660   PetscFunctionBegin;
661   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
662   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
663   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
664 
665   PetscCall(DMGetDimension(dm, &dim));
666   dm->periodic.num_affines = plex->periodic.num_face_sfs;
667   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
668 
669   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
670     PetscInt        npoints, vsize, isize;
671     const PetscInt *points;
672     IS              is = plex->periodic.periodic_points[f];
673     PetscSegBuffer  seg;
674     PetscSection    section;
675     PetscInt       *ind;
676     Vec             L, P;
677     VecType         vec_type;
678     VecScatter      scatter;
679     PetscScalar    *x;
680 
681     PetscCall(DMGetLocalSection(dm, &section));
682     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
683     PetscCall(ISGetSize(is, &npoints));
684     PetscCall(ISGetIndices(is, &points));
685     for (PetscInt i = 0; i < npoints; i++) {
686       PetscInt point = points[i], off, dof;
687 
688       PetscCall(PetscSectionGetOffset(section, point, &off));
689       PetscCall(PetscSectionGetDof(section, point, &dof));
690       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
691       for (PetscInt j = 0; j < dof / dim; j++) {
692         PetscInt *slot;
693 
694         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
695         *slot = off / dim + j;
696       }
697     }
698     PetscCall(PetscSegBufferGetSize(seg, &count));
699     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
700     PetscCall(PetscSegBufferDestroy(&seg));
701     PetscCall(PetscIntCast(count, &isize));
702     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));
703 
704     PetscCall(PetscIntCast(count * dim, &vsize));
705     PetscCall(DMGetLocalVector(dm, &L));
706     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
707     PetscCall(VecSetSizes(P, vsize, vsize));
708     PetscCall(VecGetType(L, &vec_type));
709     PetscCall(VecSetType(P, vec_type));
710     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
711     PetscCall(DMRestoreLocalVector(dm, &L));
712     PetscCall(ISDestroy(&isdof));
713 
714     PetscCall(VecGetArrayWrite(P, &x));
715     for (PetscCount i = 0; i < count; i++) {
716       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
717     }
718     PetscCall(VecRestoreArrayWrite(P, &x));
719 
720     dm->periodic.affine_to_local[f] = scatter;
721     dm->periodic.affine[f]          = P;
722   }
723   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
724   PetscFunctionReturn(PETSC_SUCCESS);
725 }
726 
727 // We'll just orient all the edges, though only periodic boundary edges need orientation
728 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
729 {
730   PetscInt dim, eStart, eEnd;
731 
732   PetscFunctionBegin;
733   PetscCall(DMGetDimension(dm, &dim));
734   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
735   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
736   for (PetscInt e = eStart; e < eEnd; e++) {
737     const PetscInt *cone;
738     PetscCall(DMPlexGetCone(dm, e, &cone));
739     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
740   }
741   PetscFunctionReturn(PETSC_SUCCESS);
742 }
743 
744 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
745 {
746   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
747   const Ijk closure_1[] = {
748     {0, 0, 0},
749     {1, 0, 0},
750   };
751   const Ijk closure_2[] = {
752     {0, 0, 0},
753     {1, 0, 0},
754     {1, 1, 0},
755     {0, 1, 0},
756   };
757   const Ijk closure_3[] = {
758     {0, 0, 0},
759     {0, 1, 0},
760     {1, 1, 0},
761     {1, 0, 0},
762     {0, 0, 1},
763     {1, 0, 1},
764     {1, 1, 1},
765     {0, 1, 1},
766   };
767   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
768   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
769   const PetscInt        face_marker_1[]   = {1, 2};
770   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
771   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
772   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
773   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
774   // These orientations can be determined by examining cones of a reference quad and hex element.
775   const PetscInt        face_orient_1[]   = {0, 0};
776   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
777   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
778   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
779 
780   PetscFunctionBegin;
781   PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
782   PetscAssertPointer(dm, 1);
783   PetscValidLogicalCollectiveInt(dm, dim, 2);
784   PetscCall(DMSetDimension(dm, dim));
785   PetscMPIInt rank, size;
786   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
787   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
788   for (PetscInt i = 0; i < dim; i++) {
789     eextent[i] = faces[i];
790     vextent[i] = faces[i] + 1;
791   }
792   ZLayout layout;
793   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
794   PetscZSet vset; // set of all vertices in the closure of the owned elements
795   PetscCall(PetscZSetCreate(&vset));
796   PetscInt local_elems = 0;
797   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
798     Ijk loc = ZCodeSplit(z);
799     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
800     else {
801       z += ZStepOct(z);
802       continue;
803     }
804     if (IjkActive(layout.eextent, loc)) {
805       local_elems++;
806       // Add all neighboring vertices to set
807       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
808         Ijk   inc  = closure_dim[dim][n];
809         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
810         ZCode v    = ZEncode(nloc);
811         PetscCall(PetscZSetAdd(vset, v));
812       }
813     }
814   }
815   PetscInt local_verts, off = 0;
816   ZCode   *vert_z;
817   PetscCall(PetscZSetGetSize(vset, &local_verts));
818   PetscCall(PetscMalloc1(local_verts, &vert_z));
819   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
820   PetscCall(PetscZSetDestroy(&vset));
821   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
822   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
823 
824   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
825   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
826   PetscCall(DMSetUp(dm));
827   {
828     PetscInt e = 0;
829     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
830       Ijk loc = ZCodeSplit(z);
831       if (!IjkActive(layout.eextent, loc)) {
832         z += ZStepOct(z);
833         continue;
834       }
835       PetscInt cone[8], orient[8] = {0};
836       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
837         Ijk      inc  = closure_dim[dim][n];
838         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
839         ZCode    v    = ZEncode(nloc);
840         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
841         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
842         cone[n] = local_elems + ci;
843       }
844       PetscCall(DMPlexSetCone(dm, e, cone));
845       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
846       e++;
847     }
848   }
849 
850   PetscCall(DMPlexSymmetrize(dm));
851   PetscCall(DMPlexStratify(dm));
852 
853   { // Create point SF
854     PetscSF sf;
855     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
856     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
857     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
858     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
859     PetscInt    *local_ghosts;
860     PetscSFNode *ghosts;
861     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
862     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
863     for (PetscInt i = 0; i < num_ghosts;) {
864       ZCode       z = vert_z[owned_verts + i];
865       PetscMPIInt remote_rank, remote_count = 0;
866 
867       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout.zstarts), &remote_rank));
868       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
869       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
870 
871       // Count the elements on remote_rank
872       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
873 
874       // Traverse vertices and make ghost links
875       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
876         Ijk loc = ZCodeSplit(rz);
877         if (rz == z) {
878           local_ghosts[i] = local_elems + owned_verts + i;
879           ghosts[i].rank  = remote_rank;
880           ghosts[i].index = remote_elem + remote_count;
881           i++;
882           if (i == num_ghosts) break;
883           z = vert_z[owned_verts + i];
884         }
885         if (IjkActive(layout.vextent, loc)) remote_count++;
886         else rz += ZStepOct(rz);
887       }
888     }
889     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
890     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
891     PetscCall(DMSetPointSF(dm, sf));
892     PetscCall(PetscSFDestroy(&sf));
893   }
894   {
895     Vec          coordinates;
896     PetscScalar *coords;
897     PetscSection coord_section;
898     PetscInt     coord_size;
899     PetscCall(DMGetCoordinateSection(dm, &coord_section));
900     PetscCall(PetscSectionSetNumFields(coord_section, 1));
901     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
902     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
903     for (PetscInt v = 0; v < local_verts; v++) {
904       PetscInt point = local_elems + v;
905       PetscCall(PetscSectionSetDof(coord_section, point, dim));
906       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
907     }
908     PetscCall(PetscSectionSetUp(coord_section));
909     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
910     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
911     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
912     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
913     PetscCall(VecSetBlockSize(coordinates, dim));
914     PetscCall(VecSetType(coordinates, VECSTANDARD));
915     PetscCall(VecGetArray(coordinates, &coords));
916     for (PetscInt v = 0; v < local_verts; v++) {
917       Ijk loc             = ZCodeSplit(vert_z[v]);
918       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
919       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
920       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
921     }
922     PetscCall(VecRestoreArray(coordinates, &coords));
923     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
924     PetscCall(VecDestroy(&coordinates));
925   }
926   if (interpolate) {
927     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
928     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
929     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
930     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
931     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
932     // be needed in a general CGNS reader, for example.
933     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
934 
935     DMLabel label;
936     PetscCall(DMCreateLabel(dm, "Face Sets"));
937     PetscCall(DMGetLabel(dm, "Face Sets", &label));
938     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
939     for (PetscInt i = 0; i < 3; i++) {
940       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
941       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
942       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
943     }
944     PetscInt fStart, fEnd, vStart, vEnd;
945     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
946     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
947     for (PetscInt f = fStart; f < fEnd; f++) {
948       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
949       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
950       PetscInt bc_count[6] = {0};
951       for (PetscInt i = 0; i < npoints; i++) {
952         PetscInt p = points[2 * i];
953         if (!IsPointInsideStratum(p, vStart, vEnd)) continue;
954         fverts[num_fverts++] = p;
955         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
956         // Convention here matches DMPlexCreateCubeMesh_Internal
957         bc_count[0] += loc.i == 0;
958         bc_count[1] += loc.i == layout.vextent.i - 1;
959         bc_count[2] += loc.j == 0;
960         bc_count[3] += loc.j == layout.vextent.j - 1;
961         bc_count[4] += loc.k == 0;
962         bc_count[5] += loc.k == layout.vextent.k - 1;
963       }
964       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
965       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
966         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
967           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
968           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
969             PetscInt *put;
970             if (bc % 2 == 0) { // donor face; no label
971               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
972               *put = f;
973             } else { // periodic face
974               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
975               *put = f;
976               ZCode *zput;
977               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
978               for (PetscInt i = 0; i < num_fverts; i++) {
979                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
980                 switch (bc / 2) {
981                 case 0:
982                   loc.i = 0;
983                   break;
984                 case 1:
985                   loc.j = 0;
986                   break;
987                 case 2:
988                   loc.k = 0;
989                   break;
990                 }
991                 *zput++ = ZEncode(loc);
992               }
993             }
994             continue;
995           }
996           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
997           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
998           bc_match++;
999         }
1000       }
1001     }
1002     // Ensure that the Coordinate DM has our new boundary labels
1003     DM cdm;
1004     PetscCall(DMGetCoordinateDM(dm, &cdm));
1005     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
1006     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
1007       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
1008     }
1009     for (PetscInt i = 0; i < 3; i++) {
1010       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
1011       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
1012       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
1013     }
1014   }
1015   PetscCall(PetscFree(layout.zstarts));
1016   PetscCall(PetscFree(vert_z));
1017   PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
1018   PetscFunctionReturn(PETSC_SUCCESS);
1019 }
1020 
1021 /*@
1022   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
1023 
1024   Logically Collective
1025 
1026   Input Parameters:
1027 + dm           - The `DMPLEX` on which to set periodicity
1028 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
1029 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1030 
1031   Level: advanced
1032 
1033   Note:
1034   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
1035   and locally, but are paired when creating a global dof space.
1036 
1037 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
1038 @*/
1039 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
1040 {
1041   DM_Plex *plex = (DM_Plex *)dm->data;
1042 
1043   PetscFunctionBegin;
1044   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1045   if (num_face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
1046   else PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", NULL));
1047   if (num_face_sfs == plex->periodic.num_face_sfs && (num_face_sfs == 0 || face_sfs == plex->periodic.face_sfs)) PetscFunctionReturn(PETSC_SUCCESS);
1048   PetscCall(DMSetGlobalSection(dm, NULL));
1049 
1050   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
1051 
1052   if (plex->periodic.num_face_sfs > 0) {
1053     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
1054     PetscCall(PetscFree(plex->periodic.face_sfs));
1055   }
1056 
1057   plex->periodic.num_face_sfs = num_face_sfs;
1058   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
1059   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
1060 
1061   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
1062   if (cdm) {
1063     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
1064     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
1065   }
1066   PetscFunctionReturn(PETSC_SUCCESS);
1067 }
1068 
1069 /*@C
1070   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
1071 
1072   Logically Collective
1073 
1074   Input Parameter:
1075 . dm - The `DMPLEX` for which to obtain periodic relation
1076 
1077   Output Parameters:
1078 + num_face_sfs - Number of `PetscSF`s in the array
1079 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1080 
1081   Level: advanced
1082 
1083 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1084 @*/
1085 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1086 {
1087   DM_Plex *plex = (DM_Plex *)dm->data;
1088 
1089   PetscFunctionBegin;
1090   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1091   *face_sfs     = plex->periodic.face_sfs;
1092   *num_face_sfs = plex->periodic.num_face_sfs;
1093   PetscFunctionReturn(PETSC_SUCCESS);
1094 }
1095 
1096 /*@C
1097   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1098 
1099   Logically Collective
1100 
1101   Input Parameters:
1102 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1103 . n  - Number of transforms in array
1104 - t  - Array of 4x4 affine transformation basis.
1105 
1106   Level: advanced
1107 
1108   Notes:
1109   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1110   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1111   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1112   simple matrix multiplication.
1113 
1114   Although the interface accepts a general affine transform, only affine translation is supported at present.
1115 
1116   Developer Notes:
1117   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1118   adding GPU implementations to apply the G2L/L2G transforms.
1119 
1120 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1121 @*/
1122 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1123 {
1124   DM_Plex *plex = (DM_Plex *)dm->data;
1125 
1126   PetscFunctionBegin;
1127   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1128   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1129 
1130   PetscCall(PetscFree(plex->periodic.transform));
1131   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1132   for (PetscInt i = 0; i < n; i++) {
1133     for (PetscInt j = 0; j < 4; j++) {
1134       for (PetscInt k = 0; k < 4; k++) {
1135         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1136         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1137       }
1138     }
1139   }
1140   PetscFunctionReturn(PETSC_SUCCESS);
1141 }
1142