xref: /petsc/src/dm/impls/plex/plexsfc.c (revision f69e25f999951fb72c66e099de6982d50099df91)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 // ***** Overview of ZCode *******
21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index.
22 // This is known as Morton encoding, and is refered to as ZCode in this code.
23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be:
24 //    [k2,j2,i2,k1,j1,i1,k0,j0,i0]
25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`).
26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices.
27 
28 // Decodes the leading interleaved index from a ZCode
29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0]
30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 (translated to octal)
31 static unsigned ZCodeSplit1(ZCode z)
32 {
33   z &= 0111111111111111111111;
34   z = (z | z >> 2) & 0103030303030303030303;
35   z = (z | z >> 4) & 0100170017001700170017;
36   z = (z | z >> 8) & 0370000037700000377;
37   z = (z | z >> 16) & 0370000000000177777;
38   z = (z | z >> 32) & 07777777;
39   return (unsigned)z;
40 }
41 
42 // Encodes the leading interleaved index from a ZCode
43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0]
44 static ZCode ZEncode1(unsigned t)
45 {
46   ZCode z = t;
47   z &= 07777777;
48   z = (z | z << 32) & 0370000000000177777;
49   z = (z | z << 16) & 0370000037700000377;
50   z = (z | z << 8) & 0100170017001700170017;
51   z = (z | z << 4) & 0103030303030303030303;
52   z = (z | z << 2) & 0111111111111111111111;
53   return z;
54 }
55 
56 // Decodes i j k indices from a ZCode.
57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode
58 static Ijk ZCodeSplit(ZCode z)
59 {
60   Ijk c;
61   c.i = ZCodeSplit1(z >> 2);
62   c.j = ZCodeSplit1(z >> 1);
63   c.k = ZCodeSplit1(z >> 0);
64   return c;
65 }
66 
67 // Encodes i j k indices to a ZCode.
68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement
69 static ZCode ZEncode(Ijk c)
70 {
71   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
72   return z;
73 }
74 
75 static PetscBool IjkActive(Ijk extent, Ijk l)
76 {
77   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
78   return PETSC_FALSE;
79 }
80 
81 // If z is not the base of an octet (last 3 bits 0), return 0.
82 //
83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
85 static ZCode ZStepOct(ZCode z)
86 {
87   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
88   ZCode step = 07;
89   for (; (z & step) == 0; step = (step << 3) | 07) { }
90   return step >> 3;
91 }
92 
93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
95 {
96   PetscFunctionBegin;
97   layout->eextent.i = eextent[0];
98   layout->eextent.j = eextent[1];
99   layout->eextent.k = eextent[2];
100   layout->vextent.i = vextent[0];
101   layout->vextent.j = vextent[1];
102   layout->vextent.k = vextent[2];
103   layout->comm_size = size;
104   layout->zstarts   = NULL;
105   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));
106 
107   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
108   ZCode    z           = 0;
109   layout->zstarts[0]   = 0;
110   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
111   for (PetscMPIInt r = 0; r < size; r++) {
112     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
113     for (count = 0; count < elems_needed; z++) {
114       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
115       for (ZCode s = skip;; s >>= 3) {
116         Ijk trial = ZCodeSplit(z + s);
117         if (IjkActive(layout->eextent, trial)) {
118           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
119           count += s + 1;
120           z += s;
121           break;
122         }
123         if (s == 0) { // the whole skip octet is inactive
124           z += skip;
125           break;
126         }
127       }
128     }
129     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
130     //
131     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
132     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
133     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
134     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
135     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
136     // complicate the job of identifying an owner and its offset.
137     //
138     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
139     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
140     // the issue:
141     //
142     // Consider this partition on rank 0 (left) and rank 1.
143     //
144     //    4 --------  5 -- 14 --10 -- 21 --11
145     //                |          |          |
146     // 7 -- 16 --  8  |          |          |
147     // |           |  3 -------  7 -------  9
148     // |           |             |          |
149     // 4 --------  6 ------ 10   |          |
150     // |           |         |   6 -- 16 -- 8
151     // |           |         |
152     // 3 ---11---  5 --18--  9
153     //
154     // The periodic face SF looks like
155     // [0] Number of roots=21, leaves=1, remote ranks=1
156     // [0] 16 <- (0,11)
157     // [1] Number of roots=22, leaves=2, remote ranks=2
158     // [1] 14 <- (0,18)
159     // [1] 21 <- (1,16)
160     //
161     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
162     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
163     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
164     //
165     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
166     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
167     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
168     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
169     // stranded vertices.
170     for (; z <= ZEncode(layout->vextent); z++) {
171       Ijk loc = ZCodeSplit(z);
172       if (IjkActive(layout->eextent, loc)) break;
173       z += ZStepOct(z);
174     }
175     layout->zstarts[r + 1] = z;
176   }
177   layout->zstarts[size] = ZEncode(layout->vextent);
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
182 {
183   PetscInt remote_elem = 0;
184   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
185     Ijk loc = ZCodeSplit(rz);
186     if (IjkActive(layout->eextent, loc)) remote_elem++;
187     else rz += ZStepOct(rz);
188   }
189   return remote_elem;
190 }
191 
192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
193 {
194   PetscInt lo = 0, hi = n;
195 
196   if (n == 0) return -1;
197   while (hi - lo > 1) {
198     PetscInt mid = lo + (hi - lo) / 2;
199     if (key < X[mid]) hi = mid;
200     else lo = mid;
201   }
202   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
203 }
204 
205 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
206 {
207   MPI_Comm    comm;
208   PetscInt    dim, vStart, vEnd;
209   PetscMPIInt size;
210   PetscSF     face_sfs[3];
211   PetscScalar transforms[3][4][4] = {{{0}}};
212 
213   PetscFunctionBegin;
214   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
215   PetscCallMPI(MPI_Comm_size(comm, &size));
216   PetscCall(DMGetDimension(dm, &dim));
217   const PetscInt csize = PetscPowInt(2, dim - 1);
218   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
219 
220   PetscInt num_directions = 0;
221   for (PetscInt direction = 0; direction < dim; direction++) {
222     PetscCount   num_faces;
223     PetscInt    *faces;
224     ZCode       *donor_verts, *donor_minz;
225     PetscSFNode *leaf;
226     PetscCount   num_multiroots = 0;
227     PetscInt     pStart, pEnd;
228     PetscBool    sorted;
229     PetscInt     inum_faces;
230 
231     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
232     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
233     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
234     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
235     PetscCall(PetscMalloc1(num_faces, &donor_minz));
236     PetscCall(PetscMalloc1(num_faces, &leaf));
237     for (PetscCount i = 0; i < num_faces; i++) {
238       ZCode minz = donor_verts[i * csize];
239 
240       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
241       donor_minz[i] = minz;
242     }
243     PetscCall(PetscIntCast(num_faces, &inum_faces));
244     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
245     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
246     // Checking for sorting is a cheap check that there are no duplicates.
247     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
248     for (PetscCount i = 0; i < num_faces;) {
249       ZCode       z           = donor_minz[i];
250       PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0;
251 
252       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
253       // Process all the vertices on this rank
254       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
255         Ijk loc = ZCodeSplit(rz);
256 
257         if (rz == z) {
258           leaf[i].rank  = remote_rank;
259           leaf[i].index = remote_count;
260           i++;
261           if (i == num_faces) break;
262           z = donor_minz[i];
263         }
264         if (IjkActive(layout->vextent, loc)) remote_count++;
265       }
266     }
267     PetscCall(PetscFree(donor_minz));
268     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
269     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
270     const PetscInt *my_donor_degree;
271     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
272     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));
273 
274     for (PetscInt i = 0; i < vEnd - vStart; i++) {
275       num_multiroots += my_donor_degree[i];
276       if (my_donor_degree[i] == 0) continue;
277       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
278     }
279     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
280     PetscCount num_my_donors;
281 
282     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
283     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots);
284     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
285     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
286     for (PetscCount i = 0; i < num_my_donors; i++) {
287       PetscInt f = my_donors[i];
288       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;
289 
290       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
291       for (PetscInt j = 0; j < num_points; j++) {
292         PetscInt p = points[2 * j];
293         if (p < vStart || vEnd <= p) continue;
294         minv = PetscMin(minv, p);
295       }
296       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
297       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
298       my_donor_indices[minv - vStart] = f;
299     }
300     PetscCall(PetscMalloc1(num_faces, &donor_indices));
301     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
302     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
303     PetscCall(PetscFree(my_donor_indices));
304     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
305     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
306     PetscCall(PetscFree(donor_indices));
307     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
308     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
309     {
310       char face_sf_name[PETSC_MAX_PATH_LEN];
311       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
312       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
313     }
314 
315     transforms[num_directions][0][0]         = 1;
316     transforms[num_directions][1][1]         = 1;
317     transforms[num_directions][2][2]         = 1;
318     transforms[num_directions][3][3]         = 1;
319     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
320     num_directions++;
321   }
322 
323   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
324   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));
325 
326   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
327   PetscFunctionReturn(PETSC_SUCCESS);
328 }
329 
330 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
331 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
332 // We use this crude approach here so we don't have to write new GPU kernels yet.
333 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
334 {
335   PetscFunctionBegin;
336   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
337   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
338     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
339     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
340   }
341   PetscFunctionReturn(PETSC_SUCCESS);
342 }
343 
344 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
345 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
346 // are isomorphic. It may be useful/necessary for this restriction to be loosened.
347 //
348 // Output Arguments:
349 //
350 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
351 //   can be used to create a global section and section SF.
352 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
353 //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
354 //
355 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
356 {
357   MPI_Comm           comm;
358   PetscMPIInt        rank;
359   PetscSF            point_sf;
360   PetscInt           nroots, nleaves;
361   const PetscInt    *filocal;
362   const PetscSFNode *firemote;
363 
364   PetscFunctionBegin;
365   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
366   PetscCallMPI(MPI_Comm_rank(comm, &rank));
367   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
368   PetscCall(PetscMalloc1(num_face_sfs, is_points));
369 
370   for (PetscInt f = 0; f < num_face_sfs; f++) {
371     PetscSF   face_sf = face_sfs[f];
372     PetscInt *rootdata, *leafdata;
373 
374     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
375     PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
376     for (PetscInt i = 0; i < nleaves; i++) {
377       PetscInt point = filocal[i], cl_size, *closure = NULL;
378       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
379       leafdata[point] = cl_size - 1;
380       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
381     }
382     PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
383     PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
384 
385     PetscInt root_offset = 0;
386     for (PetscInt p = 0; p < nroots; p++) {
387       const PetscInt *donor_dof = rootdata + nroots;
388       if (donor_dof[p] == 0) {
389         rootdata[2 * p]     = -1;
390         rootdata[2 * p + 1] = -1;
391         continue;
392       }
393       PetscInt  cl_size;
394       PetscInt *closure = NULL;
395       PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
396       // cl_size - 1 = points not including self
397       PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
398       rootdata[2 * p]     = root_offset;
399       rootdata[2 * p + 1] = cl_size - 1;
400       root_offset += cl_size - 1;
401       PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
402     }
403     PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
404     PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
405     // Count how many leaves we need to communicate the closures
406     PetscInt leaf_offset = 0;
407     for (PetscInt i = 0; i < nleaves; i++) {
408       PetscInt point = filocal[i];
409       if (leafdata[2 * point + 1] < 0) continue;
410       leaf_offset += leafdata[2 * point + 1];
411     }
412 
413     PetscSFNode *closure_leaf;
414     PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
415     leaf_offset = 0;
416     for (PetscInt i = 0; i < nleaves; i++) {
417       PetscInt point   = filocal[i];
418       PetscInt cl_size = leafdata[2 * point + 1];
419       if (cl_size < 0) continue;
420       for (PetscInt j = 0; j < cl_size; j++) {
421         closure_leaf[leaf_offset].rank  = firemote[i].rank;
422         closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
423         leaf_offset++;
424       }
425     }
426 
427     PetscSF sf_closure;
428     PetscCall(PetscSFCreate(comm, &sf_closure));
429     PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
430 
431     PetscSFNode *leaf_donor_closure;
432     { // Pack root buffer with owner for every point in the root cones
433       PetscSFNode       *donor_closure;
434       const PetscInt    *pilocal;
435       const PetscSFNode *piremote;
436       PetscInt           npoints;
437 
438       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
439       PetscCall(PetscCalloc1(root_offset, &donor_closure));
440       root_offset = 0;
441       for (PetscInt p = 0; p < nroots; p++) {
442         if (rootdata[2 * p] < 0) continue;
443         PetscInt  cl_size;
444         PetscInt *closure = NULL;
445         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
446         for (PetscInt j = 1; j < cl_size; j++) {
447           PetscInt c = closure[2 * j];
448           if (pilocal) {
449             PetscInt found = -1;
450             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
451             if (found >= 0) {
452               donor_closure[root_offset++] = piremote[found];
453               continue;
454             }
455           }
456           // we own c
457           donor_closure[root_offset].rank  = rank;
458           donor_closure[root_offset].index = c;
459           root_offset++;
460         }
461         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
462       }
463 
464       PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
465       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
466       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
467       PetscCall(PetscSFDestroy(&sf_closure));
468       PetscCall(PetscFree(donor_closure));
469     }
470 
471     PetscSFNode *new_iremote;
472     PetscCall(PetscCalloc1(nroots, &new_iremote));
473     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
474     // Walk leaves and match vertices
475     leaf_offset = 0;
476     for (PetscInt i = 0; i < nleaves; i++) {
477       PetscInt  point   = filocal[i], cl_size;
478       PetscInt *closure = NULL;
479       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
480       for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
481         PetscInt    c  = closure[2 * j];
482         PetscSFNode lc = leaf_donor_closure[leaf_offset];
483         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
484         if (new_iremote[c].rank == -1) {
485           new_iremote[c] = lc;
486         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
487         leaf_offset++;
488       }
489       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
490     }
491     PetscCall(PetscFree(leaf_donor_closure));
492 
493     // Include face points in closure SF
494     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
495     // consolidate leaves
496     PetscInt num_new_leaves = 0;
497     for (PetscInt i = 0; i < nroots; i++) {
498       if (new_iremote[i].rank == -1) continue;
499       new_iremote[num_new_leaves] = new_iremote[i];
500       leafdata[num_new_leaves]    = i;
501       num_new_leaves++;
502     }
503     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));
504 
505     PetscSF csf;
506     PetscCall(PetscSFCreate(comm, &csf));
507     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
508     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
509     PetscCall(PetscFree2(rootdata, leafdata));
510 
511     PetscInt npoints;
512     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
513     if (npoints < 0) { // empty point_sf
514       *closure_sf = csf;
515     } else {
516       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
517       PetscCall(PetscSFDestroy(&csf));
518     }
519     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
520     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
521   }
522   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
523   PetscFunctionReturn(PETSC_SUCCESS);
524 }
525 
526 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
527 {
528   DM_Plex *plex = (DM_Plex *)dm->data;
529 
530   PetscFunctionBegin;
531   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
532   if (sf) *sf = plex->periodic.composed_sf;
533   PetscFunctionReturn(PETSC_SUCCESS);
534 }
535 
536 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
537 {
538   DM_Plex    *plex = (DM_Plex *)old_dm->data;
539   PetscSF     sf_point, *new_face_sfs;
540   PetscMPIInt rank;
541 
542   PetscFunctionBegin;
543   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
544   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
545   PetscCall(DMGetPointSF(dm, &sf_point));
546   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));
547 
548   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
549     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
550     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
551     const PetscInt    *old_local, *point_local;
552     const PetscSFNode *old_remote, *point_remote;
553 
554     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
555     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
556     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
557     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
558     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));
559 
560     // Fill new_leafdata with new owners of all points
561     for (PetscInt i = 0; i < new_npoints; i++) {
562       new_leafdata[i].rank  = rank;
563       new_leafdata[i].index = i;
564     }
565     for (PetscInt i = 0; i < point_nleaf; i++) {
566       PetscInt j      = point_local[i];
567       new_leafdata[j] = point_remote[i];
568     }
569     // REPLACE is okay because every leaf agrees about the new owners
570     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
571     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
572     // rootdata now contains the new owners
573 
574     // Send to leaves of old space
575     for (PetscInt i = 0; i < old_npoints; i++) {
576       leafdata[i].rank  = -1;
577       leafdata[i].index = -1;
578     }
579     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
580     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
581 
582     // Send to new leaf space
583     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
584     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
585 
586     PetscInt     nface = 0, *new_local;
587     PetscSFNode *new_remote;
588     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
589     PetscCall(PetscMalloc1(nface, &new_local));
590     PetscCall(PetscMalloc1(nface, &new_remote));
591     nface = 0;
592     for (PetscInt i = 0; i < new_npoints; i++) {
593       if (new_leafdata[i].rank == -1) continue;
594       new_local[nface]  = i;
595       new_remote[nface] = new_leafdata[i];
596       nface++;
597     }
598     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
599     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
600     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
601     {
602       char new_face_sf_name[PETSC_MAX_PATH_LEN];
603       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
604       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
605     }
606   }
607 
608   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
609   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
610   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
611   PetscCall(PetscFree(new_face_sfs));
612   PetscFunctionReturn(PETSC_SUCCESS);
613 }
614 
615 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
616 {
617   DM_Plex   *plex = (DM_Plex *)dm->data;
618   PetscCount count;
619   IS         isdof;
620   PetscInt   dim;
621 
622   PetscFunctionBegin;
623   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
624   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
626 
627   PetscCall(DMGetDimension(dm, &dim));
628   dm->periodic.num_affines = plex->periodic.num_face_sfs;
629   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));
630 
631   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
632     PetscInt        npoints, vsize, isize;
633     const PetscInt *points;
634     IS              is = plex->periodic.periodic_points[f];
635     PetscSegBuffer  seg;
636     PetscSection    section;
637     PetscInt       *ind;
638     Vec             L, P;
639     VecType         vec_type;
640     VecScatter      scatter;
641     PetscScalar    *x;
642 
643     PetscCall(DMGetLocalSection(dm, &section));
644     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
645     PetscCall(ISGetSize(is, &npoints));
646     PetscCall(ISGetIndices(is, &points));
647     for (PetscInt i = 0; i < npoints; i++) {
648       PetscInt point = points[i], off, dof;
649 
650       PetscCall(PetscSectionGetOffset(section, point, &off));
651       PetscCall(PetscSectionGetDof(section, point, &dof));
652       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
653       for (PetscInt j = 0; j < dof / dim; j++) {
654         PetscInt *slot;
655 
656         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
657         *slot = off / dim + j;
658       }
659     }
660     PetscCall(PetscSegBufferGetSize(seg, &count));
661     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
662     PetscCall(PetscSegBufferDestroy(&seg));
663     PetscCall(PetscIntCast(count, &isize));
664     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));
665 
666     PetscCall(PetscIntCast(count * dim, &vsize));
667     PetscCall(DMGetLocalVector(dm, &L));
668     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
669     PetscCall(VecSetSizes(P, vsize, vsize));
670     PetscCall(VecGetType(L, &vec_type));
671     PetscCall(VecSetType(P, vec_type));
672     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
673     PetscCall(DMRestoreLocalVector(dm, &L));
674     PetscCall(ISDestroy(&isdof));
675 
676     PetscCall(VecGetArrayWrite(P, &x));
677     for (PetscCount i = 0; i < count; i++) {
678       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
679     }
680     PetscCall(VecRestoreArrayWrite(P, &x));
681 
682     dm->periodic.affine_to_local[f] = scatter;
683     dm->periodic.affine[f]          = P;
684   }
685   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
689 // We'll just orient all the edges, though only periodic boundary edges need orientation
690 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
691 {
692   PetscInt dim, eStart, eEnd;
693 
694   PetscFunctionBegin;
695   PetscCall(DMGetDimension(dm, &dim));
696   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
697   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
698   for (PetscInt e = eStart; e < eEnd; e++) {
699     const PetscInt *cone;
700     PetscCall(DMPlexGetCone(dm, e, &cone));
701     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
707 {
708   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
709   const Ijk closure_1[] = {
710     {0, 0, 0},
711     {1, 0, 0},
712   };
713   const Ijk closure_2[] = {
714     {0, 0, 0},
715     {1, 0, 0},
716     {1, 1, 0},
717     {0, 1, 0},
718   };
719   const Ijk closure_3[] = {
720     {0, 0, 0},
721     {0, 1, 0},
722     {1, 1, 0},
723     {1, 0, 0},
724     {0, 0, 1},
725     {1, 0, 1},
726     {1, 1, 1},
727     {0, 1, 1},
728   };
729   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
730   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
731   const PetscInt        face_marker_1[]   = {1, 2};
732   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
733   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
734   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
735   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
736   // These orientations can be determined by examining cones of a reference quad and hex element.
737   const PetscInt        face_orient_1[]   = {0, 0};
738   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
739   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
740   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
741 
742   PetscFunctionBegin;
743   PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
744   PetscAssertPointer(dm, 1);
745   PetscValidLogicalCollectiveInt(dm, dim, 2);
746   PetscCall(DMSetDimension(dm, dim));
747   PetscMPIInt rank, size;
748   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
749   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
750   for (PetscInt i = 0; i < dim; i++) {
751     eextent[i] = faces[i];
752     vextent[i] = faces[i] + 1;
753   }
754   ZLayout layout;
755   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
756   PetscZSet vset; // set of all vertices in the closure of the owned elements
757   PetscCall(PetscZSetCreate(&vset));
758   PetscInt local_elems = 0;
759   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
760     Ijk loc = ZCodeSplit(z);
761     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
762     else {
763       z += ZStepOct(z);
764       continue;
765     }
766     if (IjkActive(layout.eextent, loc)) {
767       local_elems++;
768       // Add all neighboring vertices to set
769       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
770         Ijk   inc  = closure_dim[dim][n];
771         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
772         ZCode v    = ZEncode(nloc);
773         PetscCall(PetscZSetAdd(vset, v));
774       }
775     }
776   }
777   PetscInt local_verts, off = 0;
778   ZCode   *vert_z;
779   PetscCall(PetscZSetGetSize(vset, &local_verts));
780   PetscCall(PetscMalloc1(local_verts, &vert_z));
781   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
782   PetscCall(PetscZSetDestroy(&vset));
783   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
784   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
785 
786   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
787   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
788   PetscCall(DMSetUp(dm));
789   {
790     PetscInt e = 0;
791     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
792       Ijk loc = ZCodeSplit(z);
793       if (!IjkActive(layout.eextent, loc)) {
794         z += ZStepOct(z);
795         continue;
796       }
797       PetscInt cone[8], orient[8] = {0};
798       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
799         Ijk      inc  = closure_dim[dim][n];
800         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
801         ZCode    v    = ZEncode(nloc);
802         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
803         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
804         cone[n] = local_elems + ci;
805       }
806       PetscCall(DMPlexSetCone(dm, e, cone));
807       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
808       e++;
809     }
810   }
811 
812   PetscCall(DMPlexSymmetrize(dm));
813   PetscCall(DMPlexStratify(dm));
814 
815   { // Create point SF
816     PetscSF sf;
817     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
818     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
819     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
820     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
821     PetscInt    *local_ghosts;
822     PetscSFNode *ghosts;
823     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
824     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
825     for (PetscInt i = 0; i < num_ghosts;) {
826       ZCode       z           = vert_z[owned_verts + i];
827       PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0;
828       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
829       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
830 
831       // Count the elements on remote_rank
832       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
833 
834       // Traverse vertices and make ghost links
835       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
836         Ijk loc = ZCodeSplit(rz);
837         if (rz == z) {
838           local_ghosts[i] = local_elems + owned_verts + i;
839           ghosts[i].rank  = remote_rank;
840           ghosts[i].index = remote_elem + remote_count;
841           i++;
842           if (i == num_ghosts) break;
843           z = vert_z[owned_verts + i];
844         }
845         if (IjkActive(layout.vextent, loc)) remote_count++;
846         else rz += ZStepOct(rz);
847       }
848     }
849     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
850     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
851     PetscCall(DMSetPointSF(dm, sf));
852     PetscCall(PetscSFDestroy(&sf));
853   }
854   {
855     Vec          coordinates;
856     PetscScalar *coords;
857     PetscSection coord_section;
858     PetscInt     coord_size;
859     PetscCall(DMGetCoordinateSection(dm, &coord_section));
860     PetscCall(PetscSectionSetNumFields(coord_section, 1));
861     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
862     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
863     for (PetscInt v = 0; v < local_verts; v++) {
864       PetscInt point = local_elems + v;
865       PetscCall(PetscSectionSetDof(coord_section, point, dim));
866       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
867     }
868     PetscCall(PetscSectionSetUp(coord_section));
869     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
870     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
871     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
872     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
873     PetscCall(VecSetBlockSize(coordinates, dim));
874     PetscCall(VecSetType(coordinates, VECSTANDARD));
875     PetscCall(VecGetArray(coordinates, &coords));
876     for (PetscInt v = 0; v < local_verts; v++) {
877       Ijk loc             = ZCodeSplit(vert_z[v]);
878       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
879       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
880       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
881     }
882     PetscCall(VecRestoreArray(coordinates, &coords));
883     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
884     PetscCall(VecDestroy(&coordinates));
885   }
886   if (interpolate) {
887     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
888     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
889     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
890     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
891     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
892     // be needed in a general CGNS reader, for example.
893     PetscCall(DMPlexOrientPositiveEdges_Private(dm));
894 
895     DMLabel label;
896     PetscCall(DMCreateLabel(dm, "Face Sets"));
897     PetscCall(DMGetLabel(dm, "Face Sets", &label));
898     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
899     for (PetscInt i = 0; i < 3; i++) {
900       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
901       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
902       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
903     }
904     PetscInt fStart, fEnd, vStart, vEnd;
905     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
906     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
907     for (PetscInt f = fStart; f < fEnd; f++) {
908       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
909       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
910       PetscInt bc_count[6] = {0};
911       for (PetscInt i = 0; i < npoints; i++) {
912         PetscInt p = points[2 * i];
913         if (p < vStart || vEnd <= p) continue;
914         fverts[num_fverts++] = p;
915         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
916         // Convention here matches DMPlexCreateCubeMesh_Internal
917         bc_count[0] += loc.i == 0;
918         bc_count[1] += loc.i == layout.vextent.i - 1;
919         bc_count[2] += loc.j == 0;
920         bc_count[3] += loc.j == layout.vextent.j - 1;
921         bc_count[4] += loc.k == 0;
922         bc_count[5] += loc.k == layout.vextent.k - 1;
923       }
924       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
925       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
926         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
927           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
928           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
929             PetscInt *put;
930             if (bc % 2 == 0) { // donor face; no label
931               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
932               *put = f;
933             } else { // periodic face
934               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
935               *put = f;
936               ZCode *zput;
937               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
938               for (PetscInt i = 0; i < num_fverts; i++) {
939                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
940                 switch (bc / 2) {
941                 case 0:
942                   loc.i = 0;
943                   break;
944                 case 1:
945                   loc.j = 0;
946                   break;
947                 case 2:
948                   loc.k = 0;
949                   break;
950                 }
951                 *zput++ = ZEncode(loc);
952               }
953             }
954             continue;
955           }
956           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
957           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
958           bc_match++;
959         }
960       }
961     }
962     // Ensure that the Coordinate DM has our new boundary labels
963     DM cdm;
964     PetscCall(DMGetCoordinateDM(dm, &cdm));
965     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
966     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
967       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
968     }
969     for (PetscInt i = 0; i < 3; i++) {
970       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
971       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
972       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
973     }
974   }
975   PetscCall(PetscFree(layout.zstarts));
976   PetscCall(PetscFree(vert_z));
977   PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0));
978   PetscFunctionReturn(PETSC_SUCCESS);
979 }
980 
981 /*@
982   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh
983 
984   Logically Collective
985 
986   Input Parameters:
987 + dm           - The `DMPLEX` on which to set periodicity
988 . num_face_sfs - Number of `PetscSF`s in `face_sfs`
989 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
990 
991   Level: advanced
992 
993   Note:
994   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
995   and locally, but are paired when creating a global dof space.
996 
997 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
998 @*/
999 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
1000 {
1001   DM_Plex *plex = (DM_Plex *)dm->data;
1002 
1003   PetscFunctionBegin;
1004   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1005   if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
1006   if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
1007 
1008   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));
1009 
1010   if (plex->periodic.num_face_sfs > 0) {
1011     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
1012     PetscCall(PetscFree(plex->periodic.face_sfs));
1013   }
1014 
1015   plex->periodic.num_face_sfs = num_face_sfs;
1016   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
1017   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];
1018 
1019   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
1020   if (cdm) {
1021     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
1022     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
1023   }
1024   PetscFunctionReturn(PETSC_SUCCESS);
1025 }
1026 
1027 /*@C
1028   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh
1029 
1030   Logically Collective
1031 
1032   Input Parameter:
1033 . dm - The `DMPLEX` for which to obtain periodic relation
1034 
1035   Output Parameters:
1036 + num_face_sfs - Number of `PetscSF`s in the array
1037 - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
1038 
1039   Level: advanced
1040 
1041 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1042 @*/
1043 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1044 {
1045   DM_Plex *plex = (DM_Plex *)dm->data;
1046 
1047   PetscFunctionBegin;
1048   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1049   *face_sfs     = plex->periodic.face_sfs;
1050   *num_face_sfs = plex->periodic.num_face_sfs;
1051   PetscFunctionReturn(PETSC_SUCCESS);
1052 }
1053 
1054 /*@C
1055   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points
1056 
1057   Logically Collective
1058 
1059   Input Parameters:
1060 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1061 . n  - Number of transforms in array
1062 - t  - Array of 4x4 affine transformation basis.
1063 
1064   Level: advanced
1065 
1066   Notes:
1067   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1068   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1069   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1070   simple matrix multiplication.
1071 
1072   Although the interface accepts a general affine transform, only affine translation is supported at present.
1073 
1074   Developer Notes:
1075   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1076   adding GPU implementations to apply the G2L/L2G transforms.
1077 
1078 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1079 @*/
1080 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1081 {
1082   DM_Plex *plex = (DM_Plex *)dm->data;
1083 
1084   PetscFunctionBegin;
1085   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
1086   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);
1087 
1088   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1089   for (PetscInt i = 0; i < n; i++) {
1090     for (PetscInt j = 0; j < 4; j++) {
1091       for (PetscInt k = 0; k < 4; k++) {
1092         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1093         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1094       }
1095     }
1096   }
1097   PetscFunctionReturn(PETSC_SUCCESS);
1098 }
1099