xref: /petsc/src/dm/impls/plex/plexsfc.c (revision be37439ebbbdb2f81c3420c175a94aa72e59929c)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 // ***** Overview of ZCode *******
21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index.
22 // This is known as Morton encoding, and is refered to as ZCode in this code.
23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be:
24 //    [k2,j2,i2,k1,j1,i1,k0,j0,i0]
25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`).
26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices.
27 
28 // Decodes the leading interleaved index from a ZCode
29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0]
30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 (translated to octal)
31 static unsigned ZCodeSplit1(ZCode z)
32 {
33   z &= 0111111111111111111111;
34   z = (z | z >> 2) & 0103030303030303030303;
35   z = (z | z >> 4) & 0100170017001700170017;
36   z = (z | z >> 8) & 0370000037700000377;
37   z = (z | z >> 16) & 0370000000000177777;
38   z = (z | z >> 32) & 07777777;
39   return (unsigned)z;
40 }
41 
42 // Encodes the leading interleaved index from a ZCode
43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0]
44 static ZCode ZEncode1(unsigned t)
45 {
46   ZCode z = t;
47   z &= 07777777;
48   z = (z | z << 32) & 0370000000000177777;
49   z = (z | z << 16) & 0370000037700000377;
50   z = (z | z << 8) & 0100170017001700170017;
51   z = (z | z << 4) & 0103030303030303030303;
52   z = (z | z << 2) & 0111111111111111111111;
53   return z;
54 }
55 
56 // Decodes i j k indices from a ZCode.
57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode
58 static Ijk ZCodeSplit(ZCode z)
59 {
60   Ijk c;
61   c.i = ZCodeSplit1(z >> 2);
62   c.j = ZCodeSplit1(z >> 1);
63   c.k = ZCodeSplit1(z >> 0);
64   return c;
65 }
66 
67 // Encodes i j k indices to a ZCode.
68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement
69 static ZCode ZEncode(Ijk c)
70 {
71   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
72   return z;
73 }
74 
75 static PetscBool IjkActive(Ijk extent, Ijk l)
76 {
77   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
78   return PETSC_FALSE;
79 }
80 
81 // If z is not the base of an octet (last 3 bits 0), return 0.
82 //
83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
85 static ZCode ZStepOct(ZCode z)
86 {
87   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
88   ZCode step = 07;
89   for (; (z & step) == 0; step = (step << 3) | 07) { }
90   return step >> 3;
91 }
92 
93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
95 {
96   PetscFunctionBegin;
97   layout->eextent.i = eextent[0];
98   layout->eextent.j = eextent[1];
99   layout->eextent.k = eextent[2];
100   layout->vextent.i = vextent[0];
101   layout->vextent.j = vextent[1];
102   layout->vextent.k = vextent[2];
103   layout->comm_size = size;
104   layout->zstarts   = NULL;
105   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
106 
107   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
108   ZCode    z           = 0;
109   layout->zstarts[0]   = 0;
110   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
111   for (PetscMPIInt r = 0; r < size; r++) {
112     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
113     for (count = 0; count < elems_needed; z++) {
114       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
115       for (ZCode s = skip;; s >>= 3) {
116         Ijk trial = ZCodeSplit(z + s);
117         if (IjkActive(layout->eextent, trial)) {
118           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
119           count += s + 1;
120           z += s;
121           break;
122         }
123         if (s == 0) { // the whole skip octet is inactive
124           z += skip;
125           break;
126         }
127       }
128     }
129     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
130     //
131     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
132     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
133     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
134     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
135     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
136     // complicate the job of identifying an owner and its offset.
137     //
138     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
139     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
140     // the issue:
141     //
142     // Consider this partition on rank 0 (left) and rank 1.
143     //
144     //    4 --------  5 -- 14 --10 -- 21 --11
145     //                |          |          |
146     // 7 -- 16 --  8  |          |          |
147     // |           |  3 -------  7 -------  9
148     // |           |             |          |
149     // 4 --------  6 ------ 10   |          |
150     // |           |         |   6 -- 16 -- 8
151     // |           |         |
152     // 3 ---11---  5 --18--  9
153     //
154     // The periodic face SF looks like
155     // [0] Number of roots=21, leaves=1, remote ranks=1
156     // [0] 16 <- (0,11)
157     // [1] Number of roots=22, leaves=2, remote ranks=2
158     // [1] 14 <- (0,18)
159     // [1] 21 <- (1,16)
160     //
161     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
162     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
163     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
164     //
165     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
166     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
167     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
168     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
169     // stranded vertices.
170     for (; z <= ZEncode(layout->vextent); z++) {
171       Ijk loc = ZCodeSplit(z);
172       if (IjkActive(layout->eextent, loc)) break;
173       z += ZStepOct(z);
174     }
175     layout->zstarts[r + 1] = z;
176   }
177   layout->zstarts[size] = ZEncode(layout->vextent);
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
182 {
183   PetscInt remote_elem = 0;
184   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
185     Ijk loc = ZCodeSplit(rz);
186     if (IjkActive(layout->eextent, loc)) remote_elem++;
187     else rz += ZStepOct(rz);
188   }
189   return remote_elem;
190 }
191 
192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
193 {
194   PetscInt lo = 0, hi = n;
195 
196   if (n == 0) return -1;
197   while (hi - lo > 1) {
198     PetscInt mid = lo + (hi - lo) / 2;
199     if (key < X[mid]) hi = mid;
200     else lo = mid;
201   }
202   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
203 }
204 
205 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
206 {
207   MPI_Comm    comm;
208   PetscInt    dim, vStart, vEnd;
209   PetscMPIInt size;
210   PetscSF     face_sfs[3];
211   PetscScalar transforms[3][4][4] = {{{0}}};
212 
213   PetscFunctionBegin;
214   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
215   PetscCallMPI(MPI_Comm_size(comm, &size));
216   PetscCall(DMGetDimension(dm, &dim));
217   const PetscInt csize = PetscPowInt(2, dim - 1);
218   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
219 
220   PetscInt num_directions = 0;
221   for (PetscInt direction = 0; direction < dim; direction++) {
222     PetscCount   num_faces;
223     PetscInt    *faces;
224     ZCode       *donor_verts, *donor_minz;
225     PetscSFNode *leaf;
226     PetscCount   num_multiroots = 0;
227     PetscInt     pStart, pEnd;
228     PetscBool    sorted;
229     PetscInt     inum_faces;
230 
231     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
232     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
233     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
234     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
235     PetscCall(PetscMalloc1(num_faces, &donor_minz));
236     PetscCall(PetscMalloc1(num_faces, &leaf));
237     for (PetscCount i = 0; i < num_faces; i++) {
238       ZCode minz = donor_verts[i * csize];
239 
240       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
241       donor_minz[i] = minz;
242     }
243     PetscCall(PetscIntCast(num_faces, &inum_faces));
244     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
245     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
246     // Checking for sorting is a cheap check that there are no duplicates.
247     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
248     for (PetscCount i = 0; i < num_faces;) {
249       ZCode       z = donor_minz[i];
250       PetscMPIInt remote_rank, remote_count = 0;
251 
252       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout->zstarts), &remote_rank));
253       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
254       // Process all the vertices on this rank
255       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
256         Ijk loc = ZCodeSplit(rz);
257 
258         if (rz == z) {
259           leaf[i].rank  = remote_rank;
260           leaf[i].index = remote_count;
261           i++;
262           if (i == num_faces) break;
263           z = donor_minz[i];
264         }
265         if (IjkActive(layout->vextent, loc)) remote_count++;
266       }
267     }
268     PetscCall(PetscFree(donor_minz));
269     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
270     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
271     const PetscInt *my_donor_degree;
272     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
273     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
274 
275     for (PetscInt i = 0; i < vEnd - vStart; i++) {
276       num_multiroots += my_donor_degree[i];
277       if (my_donor_degree[i] == 0) continue;
278       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
279     }
280     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
281     PetscCount num_my_donors;
282 
283     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
284     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots);
285     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
286     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
287     for (PetscCount i = 0; i < num_my_donors; i++) {
288       PetscInt f = my_donors[i];
289       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;
290 
291       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
292       for (PetscInt j = 0; j < num_points; j++) {
293         PetscInt p = points[2 * j];
294         if (p < vStart || vEnd <= p) continue;
295         minv = PetscMin(minv, p);
296       }
297       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
298       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
299       my_donor_indices[minv - vStart] = f;
300     }
301     PetscCall(PetscMalloc1(num_faces, &donor_indices));
302     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
303     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
304     PetscCall(PetscFree(my_donor_indices));
305     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
306     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
307     PetscCall(PetscFree(donor_indices));
308     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
309     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
310     {
311       char face_sf_name[PETSC_MAX_PATH_LEN];
312       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
313       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
314     }
315 
316     transforms[num_directions][0][0]         = 1;
317     transforms[num_directions][1][1]         = 1;
318     transforms[num_directions][2][2]         = 1;
319     transforms[num_directions][3][3]         = 1;
320     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
321     num_directions++;
322   }
323 
324   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
325   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
326 
327   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
332 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
333 // We use this crude approach here so we don't have to write new GPU kernels yet.
334 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
335 {
336   PetscFunctionBegin;
337   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
338   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
339     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
340     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
341   }
342   PetscFunctionReturn(PETSC_SUCCESS);
343 }
344 
345 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
346 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
347 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
348 //
349 // Output Arguments:
350 //
351 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
352 //   can be used to create a global section and section SF.
353 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
354 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
355 //
356 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
357 {
358   MPI_Comm           comm;
359   PetscMPIInt        rank;
360   PetscSF            point_sf;
361   PetscInt           nroots, nleaves;
362   const PetscInt    *filocal;
363   const PetscSFNode *firemote;
364 
365   PetscFunctionBegin;
366   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
367   PetscCallMPI(MPI_Comm_rank(comm, &rank));
368   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
369   PetscCall(PetscMalloc1(num_face_sfs, is_points));
370 
371   for (PetscInt f = 0; f < num_face_sfs; f++) {
372     PetscSF   face_sf = face_sfs[f];
373     PetscInt *rootdata, *leafdata;
374 
375     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
376     PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
377     for (PetscInt i = 0; i < nleaves; i++) {
378       PetscInt point = filocal[i], cl_size, *closure = NULL;
379       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
380       leafdata[point] = cl_size - 1;
381       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
382     }
383     PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
384     PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
385 
386     PetscInt root_offset = 0;
387     for (PetscInt p = 0; p < nroots; p++) {
388       const PetscInt *donor_dof = rootdata + nroots;
389       if (donor_dof[p] == 0) {
390         rootdata[2 * p]     = -1;
391         rootdata[2 * p + 1] = -1;
392         continue;
393       }
394       PetscInt  cl_size;
395       PetscInt *closure = NULL;
396       PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
397       // cl_size - 1 = points not including self
398       PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
399       rootdata[2 * p]     = root_offset;
400       rootdata[2 * p + 1] = cl_size - 1;
401       root_offset += cl_size - 1;
402       PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
403     }
404     PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
405     PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
406     // Count how many leaves we need to communicate the closures
407     PetscInt leaf_offset = 0;
408     for (PetscInt i = 0; i < nleaves; i++) {
409       PetscInt point = filocal[i];
410       if (leafdata[2 * point + 1] < 0) continue;
411       leaf_offset += leafdata[2 * point + 1];
412     }
413 
414     PetscSFNode *closure_leaf;
415     PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
416     leaf_offset = 0;
417     for (PetscInt i = 0; i < nleaves; i++) {
418       PetscInt point   = filocal[i];
419       PetscInt cl_size = leafdata[2 * point + 1];
420       if (cl_size < 0) continue;
421       for (PetscInt j = 0; j < cl_size; j++) {
422         closure_leaf[leaf_offset].rank  = firemote[i].rank;
423         closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
424         leaf_offset++;
425       }
426     }
427 
428     PetscSF sf_closure;
429     PetscCall(PetscSFCreate(comm, &sf_closure));
430     PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
431 
432     PetscSFNode *leaf_donor_closure;
433     { // Pack root buffer with owner for every point in the root cones
434       PetscSFNode       *donor_closure;
435       const PetscInt    *pilocal;
436       const PetscSFNode *piremote;
437       PetscInt           npoints;
438 
439       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
440       PetscCall(PetscCalloc1(root_offset, &donor_closure));
441       root_offset = 0;
442       for (PetscInt p = 0; p < nroots; p++) {
443         if (rootdata[2 * p] < 0) continue;
444         PetscInt  cl_size;
445         PetscInt *closure = NULL;
446         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
447         for (PetscInt j = 1; j < cl_size; j++) {
448           PetscInt c = closure[2 * j];
449           if (pilocal) {
450             PetscInt found = -1;
451             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
452             if (found >= 0) {
453               donor_closure[root_offset++] = piremote[found];
454               continue;
455             }
456           }
457           // we own c
458           donor_closure[root_offset].rank  = rank;
459           donor_closure[root_offset].index = c;
460           root_offset++;
461         }
462         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
463       }
464 
465       PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
466       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
467       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
468       PetscCall(PetscSFDestroy(&sf_closure));
469       PetscCall(PetscFree(donor_closure));
470     }
471 
472     PetscSFNode *new_iremote;
473     PetscCall(PetscCalloc1(nroots, &new_iremote));
474     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
475     // Walk leaves and match vertices
476     leaf_offset = 0;
477     for (PetscInt i = 0; i < nleaves; i++) {
478       PetscInt  point   = filocal[i], cl_size;
479       PetscInt *closure = NULL;
480       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
481       for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
482         PetscInt    c  = closure[2 * j];
483         PetscSFNode lc = leaf_donor_closure[leaf_offset];
484         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
485         if (new_iremote[c].rank == -1) {
486           new_iremote[c] = lc;
487         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
488         leaf_offset++;
489       }
490       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
491     }
492     PetscCall(PetscFree(leaf_donor_closure));
493 
494     // Include face points in closure SF
495     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
496     // consolidate leaves
497     PetscInt num_new_leaves = 0;
498     for (PetscInt i = 0; i < nroots; i++) {
499       if (new_iremote[i].rank == -1) continue;
500       new_iremote[num_new_leaves] = new_iremote[i];
501       leafdata[num_new_leaves]    = i;
502       num_new_leaves++;
503     }
504     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
505 
506     PetscSF csf;
507     PetscCall(PetscSFCreate(comm, &csf));
508     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
509     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
510     PetscCall(PetscFree2(rootdata, leafdata));
511 
512     PetscInt npoints;
513     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
514     if (npoints < 0) { // empty point_sf
515       *closure_sf = csf;
516     } else {
517       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
518       PetscCall(PetscSFDestroy(&csf));
519     }
520     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
521     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
522   }
523   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
524   PetscFunctionReturn(PETSC_SUCCESS);
525 }
526 
527 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
528 {
529   DM_Plex *plex = (DM_Plex *)dm->data;
530 
531   PetscFunctionBegin;
532   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
533   if (sf) *sf = plex->periodic.composed_sf;
534   PetscFunctionReturn(PETSC_SUCCESS);
535 }
536 
537 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
538 {
539   DM_Plex    *plex = (DM_Plex *)old_dm->data;
540   PetscSF     sf_point, *new_face_sfs;
541   PetscMPIInt rank;
542 
543   PetscFunctionBegin;
544   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
545   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
546   PetscCall(DMGetPointSF(dm, &sf_point));
547   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
548 
549   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
550     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
551     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
552     const PetscInt    *old_local, *point_local;
553     const PetscSFNode *old_remote, *point_remote;
554 
555     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
556     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
557     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
558     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
559     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
560 
561     // Fill new_leafdata with new owners of all points
562     for (PetscInt i = 0; i < new_npoints; i++) {
563       new_leafdata[i].rank  = rank;
564       new_leafdata[i].index = i;
565     }
566     for (PetscInt i = 0; i < point_nleaf; i++) {
567       PetscInt j      = point_local[i];
568       new_leafdata[j] = point_remote[i];
569     }
570     // REPLACE is okay because every leaf agrees about the new owners
571     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
572     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
573     // rootdata now contains the new owners
574 
575     // Send to leaves of old space
576     for (PetscInt i = 0; i < old_npoints; i++) {
577       leafdata[i].rank  = -1;
578       leafdata[i].index = -1;
579     }
580     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
581     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
582 
583     // Send to new leaf space
584     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
585     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
586 
587     PetscInt     nface = 0, *new_local;
588     PetscSFNode *new_remote;
589     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
590     PetscCall(PetscMalloc1(nface, &new_local));
591     PetscCall(PetscMalloc1(nface, &new_remote));
592     nface = 0;
593     for (PetscInt i = 0; i < new_npoints; i++) {
594       if (new_leafdata[i].rank == -1) continue;
595       new_local[nface]  = i;
596       new_remote[nface] = new_leafdata[i];
597       nface++;
598     }
599     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
600     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
601     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
602     {
603       char new_face_sf_name[PETSC_MAX_PATH_LEN];
604       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
605       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
606     }
607   }
608 
609   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
610   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
611   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
612   PetscCall(PetscFree(new_face_sfs));
613   PetscFunctionReturn(PETSC_SUCCESS);
614 }
615 
616 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
617 {
618   DM_Plex   *plex = (DM_Plex *)dm->data;
619   PetscCount count;
620   IS         isdof;
621   PetscInt   dim;
622 
623   PetscFunctionBegin;
624   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
625   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
627 
628   PetscCall(DMGetDimension(dm, &dim));
629   dm->periodic.num_affines = plex->periodic.num_face_sfs;
630   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
631 
632   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
633     PetscInt        npoints, vsize, isize;
634     const PetscInt *points;
635     IS              is = plex->periodic.periodic_points[f];
636     PetscSegBuffer  seg;
637     PetscSection    section;
638     PetscInt       *ind;
639     Vec             L, P;
640     VecType         vec_type;
641     VecScatter      scatter;
642     PetscScalar    *x;
643 
644     PetscCall(DMGetLocalSection(dm, &section));
645     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
646     PetscCall(ISGetSize(is, &npoints));
647     PetscCall(ISGetIndices(is, &points));
648     for (PetscInt i = 0; i < npoints; i++) {
649       PetscInt point = points[i], off, dof;
650 
651       PetscCall(PetscSectionGetOffset(section, point, &off));
652       PetscCall(PetscSectionGetDof(section, point, &dof));
653       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
654       for (PetscInt j = 0; j < dof / dim; j++) {
655         PetscInt *slot;
656 
657         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
658         *slot = off / dim + j;
659       }
660     }
661     PetscCall(PetscSegBufferGetSize(seg, &count));
662     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
663     PetscCall(PetscSegBufferDestroy(&seg));
664     PetscCall(PetscIntCast(count, &isize));
665     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));
666 
667     PetscCall(PetscIntCast(count * dim, &vsize));
668     PetscCall(DMGetLocalVector(dm, &L));
669     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
670     PetscCall(VecSetSizes(P, vsize, vsize));
671     PetscCall(VecGetType(L, &vec_type));
672     PetscCall(VecSetType(P, vec_type));
673     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
674     PetscCall(DMRestoreLocalVector(dm, &L));
675     PetscCall(ISDestroy(&isdof));
676 
677     PetscCall(VecGetArrayWrite(P, &x));
678     for (PetscCount i = 0; i < count; i++) {
679       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
680     }
681     PetscCall(VecRestoreArrayWrite(P, &x));
682 
683     dm->periodic.affine_to_local[f] = scatter;
684     dm->periodic.affine[f]          = P;
685   }
686   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 // We'll just orient all the edges, though only periodic boundary edges need orientation
691 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
692 {
693   PetscInt dim, eStart, eEnd;
694 
695   PetscFunctionBegin;
696   PetscCall(DMGetDimension(dm, &dim));
697   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
698   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
699   for (PetscInt e = eStart; e < eEnd; e++) {
700     const PetscInt *cone;
701     PetscCall(DMPlexGetCone(dm, e, &cone));
702     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
703   }
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
708 {
709   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
710   const Ijk closure_1[] = {
711     {0, 0, 0},
712     {1, 0, 0},
713   };
714   const Ijk closure_2[] = {
715     {0, 0, 0},
716     {1, 0, 0},
717     {1, 1, 0},
718     {0, 1, 0},
719   };
720   const Ijk closure_3[] = {
721     {0, 0, 0},
722     {0, 1, 0},
723     {1, 1, 0},
724     {1, 0, 0},
725     {0, 0, 1},
726     {1, 0, 1},
727     {1, 1, 1},
728     {0, 1, 1},
729   };
730   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
731   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
732   const PetscInt        face_marker_1[]   = {1, 2};
733   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
734   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
735   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
736   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
737   // These orientations can be determined by examining cones of a reference quad and hex element.
738   const PetscInt        face_orient_1[]   = {0, 0};
739   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
740   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
741   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
742 
743   PetscFunctionBegin;
744   PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
745   PetscAssertPointer(dm, 1);
746   PetscValidLogicalCollectiveInt(dm, dim, 2);
747   PetscCall(DMSetDimension(dm, dim));
748   PetscMPIInt rank, size;
749   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
750   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
751   for (PetscInt i = 0; i < dim; i++) {
752     eextent[i] = faces[i];
753     vextent[i] = faces[i] + 1;
754   }
755   ZLayout layout;
756   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
757   PetscZSet vset; // set of all vertices in the closure of the owned elements
758   PetscCall(PetscZSetCreate(&vset));
759   PetscInt local_elems = 0;
760   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
761     Ijk loc = ZCodeSplit(z);
762     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
763     else {
764       z += ZStepOct(z);
765       continue;
766     }
767     if (IjkActive(layout.eextent, loc)) {
768       local_elems++;
769       // Add all neighboring vertices to set
770       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
771         Ijk   inc  = closure_dim[dim][n];
772         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
773         ZCode v    = ZEncode(nloc);
774         PetscCall(PetscZSetAdd(vset, v));
775       }
776     }
777   }
778   PetscInt local_verts, off = 0;
779   ZCode   *vert_z;
780   PetscCall(PetscZSetGetSize(vset, &local_verts));
781   PetscCall(PetscMalloc1(local_verts, &vert_z));
782   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
783   PetscCall(PetscZSetDestroy(&vset));
784   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
785   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
786 
787   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
788   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
789   PetscCall(DMSetUp(dm));
790   {
791     PetscInt e = 0;
792     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
793       Ijk loc = ZCodeSplit(z);
794       if (!IjkActive(layout.eextent, loc)) {
795         z += ZStepOct(z);
796         continue;
797       }
798       PetscInt cone[8], orient[8] = {0};
799       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
800         Ijk      inc  = closure_dim[dim][n];
801         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
802         ZCode    v    = ZEncode(nloc);
803         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
804         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
805         cone[n] = local_elems + ci;
806       }
807       PetscCall(DMPlexSetCone(dm, e, cone));
808       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
809       e++;
810     }
811   }
812 
813   PetscCall(DMPlexSymmetrize(dm));
814   PetscCall(DMPlexStratify(dm));
815 
816   { // Create point SF
817     PetscSF sf;
818     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
819     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
820     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
821     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
822     PetscInt    *local_ghosts;
823     PetscSFNode *ghosts;
824     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
825     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
826     for (PetscInt i = 0; i < num_ghosts;) {
827       ZCode       z = vert_z[owned_verts + i];
828       PetscMPIInt remote_rank, remote_count = 0;
829 
830       PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout.zstarts), &remote_rank));
831       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
832       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
833 
834       // Count the elements on remote_rank
835       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
836 
837       // Traverse vertices and make ghost links
838       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
839         Ijk loc = ZCodeSplit(rz);
840         if (rz == z) {
841           local_ghosts[i] = local_elems + owned_verts + i;
842           ghosts[i].rank  = remote_rank;
843           ghosts[i].index = remote_elem + remote_count;
844           i++;
845           if (i == num_ghosts) break;
846           z = vert_z[owned_verts + i];
847         }
848         if (IjkActive(layout.vextent, loc)) remote_count++;
849         else rz += ZStepOct(rz);
850       }
851     }
852     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
853     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
854     PetscCall(DMSetPointSF(dm, sf));
855     PetscCall(PetscSFDestroy(&sf));
856   }
857   {
858     Vec          coordinates;
859     PetscScalar *coords;
860     PetscSection coord_section;
861     PetscInt     coord_size;
862     PetscCall(DMGetCoordinateSection(dm, &coord_section));
863     PetscCall(PetscSectionSetNumFields(coord_section, 1));
864     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
865     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
866     for (PetscInt v = 0; v < local_verts; v++) {
867       PetscInt point = local_elems + v;
868       PetscCall(PetscSectionSetDof(coord_section, point, dim));
869       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
870     }
871     PetscCall(PetscSectionSetUp(coord_section));
872     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
873     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
874     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
875     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
876     PetscCall(VecSetBlockSize(coordinates, dim));
877     PetscCall(VecSetType(coordinates, VECSTANDARD));
878     PetscCall(VecGetArray(coordinates, &coords));
879     for (PetscInt v = 0; v < local_verts; v++) {
880       Ijk loc             = ZCodeSplit(vert_z[v]);
881       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
882       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
883       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
884     }
885     PetscCall(VecRestoreArray(coordinates, &coords));
886     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
887     PetscCall(VecDestroy(&coordinates));
888   }
889   if (interpolate) {
890     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
891     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
892     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
893     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
894     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
895     // be needed in a general CGNS reader, for example.
896     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
897 
898     DMLabel label;
899     PetscCall(DMCreateLabel(dm, "Face Sets"));
900     PetscCall(DMGetLabel(dm, "Face Sets", &label));
901     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
902     for (PetscInt i = 0; i < 3; i++) {
903       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
904       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
905       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
906     }
907     PetscInt fStart, fEnd, vStart, vEnd;
908     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
909     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
910     for (PetscInt f = fStart; f < fEnd; f++) {
911       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
912       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
913       PetscInt bc_count[6] = {0};
914       for (PetscInt i = 0; i < npoints; i++) {
915         PetscInt p = points[2 * i];
916         if (p < vStart || vEnd <= p) continue;
917         fverts[num_fverts++] = p;
918         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
919         // Convention here matches DMPlexCreateCubeMesh_Internal
920         bc_count[0] += loc.i == 0;
921         bc_count[1] += loc.i == layout.vextent.i - 1;
922         bc_count[2] += loc.j == 0;
923         bc_count[3] += loc.j == layout.vextent.j - 1;
924         bc_count[4] += loc.k == 0;
925         bc_count[5] += loc.k == layout.vextent.k - 1;
926       }
927       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
928       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
929         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
930           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
931           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
932             PetscInt *put;
933             if (bc % 2 == 0) { // donor face; no label
934               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
935               *put = f;
936             } else { // periodic face
937               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
938               *put = f;
939               ZCode *zput;
940               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
941               for (PetscInt i = 0; i < num_fverts; i++) {
942                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
943                 switch (bc / 2) {
944                 case 0:
945                   loc.i = 0;
946                   break;
947                 case 1:
948                   loc.j = 0;
949                   break;
950                 case 2:
951                   loc.k = 0;
952                   break;
953                 }
954                 *zput++ = ZEncode(loc);
955               }
956             }
957             continue;
958           }
959           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
960           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
961           bc_match++;
962         }
963       }
964     }
965     // Ensure that the Coordinate DM has our new boundary labels
966     DM cdm;
967     PetscCall(DMGetCoordinateDM(dm, &cdm));
968     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
969     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
970       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
971     }
972     for (PetscInt i = 0; i < 3; i++) {
973       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
974       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
975       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
976     }
977   }
978   PetscCall(PetscFree(layout.zstarts));
979   PetscCall(PetscFree(vert_z));
980   PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
981   PetscFunctionReturn(PETSC_SUCCESS);
982 }
983 
984 /*@
985   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
986 
987   Logically Collective
988 
989   Input Parameters:
990 + dm           - The `DMPLEX` on which to set periodicity
991 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
992 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
993 
994   Level: advanced
995 
996   Note:
997   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
998   and locally, but are paired when creating a global dof space.
999 
1000 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
1001 @*/
1002 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
1003 {
1004   DM_Plex *plex = (DM_Plex *)dm->data;
1005 
1006   PetscFunctionBegin;
1007   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1008   if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
1009   if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
1010 
1011   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
1012 
1013   if (plex->periodic.num_face_sfs > 0) {
1014     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
1015     PetscCall(PetscFree(plex->periodic.face_sfs));
1016   }
1017 
1018   plex->periodic.num_face_sfs = num_face_sfs;
1019   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
1020   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
1021 
1022   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
1023   if (cdm) {
1024     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
1025     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
1026   }
1027   PetscFunctionReturn(PETSC_SUCCESS);
1028 }
1029 
1030 /*@C
1031   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
1032 
1033   Logically Collective
1034 
1035   Input Parameter:
1036 . dm - The `DMPLEX` for which to obtain periodic relation
1037 
1038   Output Parameters:
1039 + num_face_sfs - Number of `PetscSF`s in the array
1040 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1041 
1042   Level: advanced
1043 
1044 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1045 @*/
1046 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1047 {
1048   DM_Plex *plex = (DM_Plex *)dm->data;
1049 
1050   PetscFunctionBegin;
1051   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1052   *face_sfs     = plex->periodic.face_sfs;
1053   *num_face_sfs = plex->periodic.num_face_sfs;
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 /*@C
1058   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1059 
1060   Logically Collective
1061 
1062   Input Parameters:
1063 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1064 . n  - Number of transforms in array
1065 - t  - Array of 4x4 affine transformation basis.
1066 
1067   Level: advanced
1068 
1069   Notes:
1070   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1071   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1072   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1073   simple matrix multiplication.
1074 
1075   Although the interface accepts a general affine transform, only affine translation is supported at present.
1076 
1077   Developer Notes:
1078   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1079   adding GPU implementations to apply the G2L/L2G transforms.
1080 
1081 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1082 @*/
1083 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1084 {
1085   DM_Plex *plex = (DM_Plex *)dm->data;
1086 
1087   PetscFunctionBegin;
1088   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1089   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1090 
1091   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1092   for (PetscInt i = 0; i < n; i++) {
1093     for (PetscInt j = 0; j < 4; j++) {
1094       for (PetscInt k = 0; k < 4; k++) {
1095         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1096         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1097       }
1098     }
1099   }
1100   PetscFunctionReturn(PETSC_SUCCESS);
1101 }
1102