xref: /petsc/src/dm/impls/plex/plexsfc.c (revision 6725e60d8c32db1e3b1b8876c5777b87fd0aba60)
1 #include <petsc/private/dmpleximpl.h> /*I   "petscdmplex.h"   I*/
2 #include <petscsf.h>
3 #include <petsc/private/hashset.h>
4 
5 typedef uint64_t ZCode;
6 
7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)
8 
9 typedef struct {
10   PetscInt i, j, k;
11 } Ijk;
12 
13 typedef struct {
14   Ijk         eextent;
15   Ijk         vextent;
16   PetscMPIInt comm_size;
17   ZCode      *zstarts;
18 } ZLayout;
19 
20 static unsigned ZCodeSplit1(ZCode z)
21 {
22   z = ((z & 01001001001001001) | ((z >> 2) & 02002002002002002) | ((z >> 4) & 04004004004004004));
23   z = (z | (z >> 6) | (z >> 12)) & 0000000777000000777;
24   z = (z | (z >> 18)) & 0777777;
25   return (unsigned)z;
26 }
27 
28 static ZCode ZEncode1(unsigned t)
29 {
30   ZCode z = t;
31   z       = (z | (z << 18)) & 0777000000777;
32   z       = (z | (z << 6) | (z << 12)) & 07007007007007007;
33   z       = (z | (z << 2) | (z << 4)) & 0111111111111111111;
34   return z;
35 }
36 
37 static Ijk ZCodeSplit(ZCode z)
38 {
39   Ijk c;
40   c.i = ZCodeSplit1(z >> 2);
41   c.j = ZCodeSplit1(z >> 1);
42   c.k = ZCodeSplit1(z >> 0);
43   return c;
44 }
45 
46 static ZCode ZEncode(Ijk c)
47 {
48   ZCode z = (ZEncode1(c.i) << 2) | (ZEncode1(c.j) << 1) | ZEncode1(c.k);
49   return z;
50 }
51 
52 static PetscBool IjkActive(Ijk extent, Ijk l)
53 {
54   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
55   return PETSC_FALSE;
56 }
57 
58 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
59 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *zlayout)
60 {
61   ZLayout layout;
62 
63   PetscFunctionBegin;
64   layout.eextent.i = eextent[0];
65   layout.eextent.j = eextent[1];
66   layout.eextent.k = eextent[2];
67   layout.vextent.i = vextent[0];
68   layout.vextent.j = vextent[1];
69   layout.vextent.k = vextent[2];
70   layout.comm_size = size;
71   PetscCall(PetscMalloc1(size + 1, &layout.zstarts));
72 
73   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
74   ZCode    z           = 0;
75   layout.zstarts[0]    = 0;
76   for (PetscMPIInt r = 0; r < size; r++) {
77     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
78     for (count = 0; count < elems_needed; z++) {
79       Ijk loc = ZCodeSplit(z);
80       if (IjkActive(layout.eextent, loc)) count++;
81     }
82     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
83     //
84     // TODO: This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
85     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
86     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
87     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
88     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
89     // complicate the job of identifying an owner and its offset.
90     for (; z <= ZEncode(layout.vextent); z++) {
91       Ijk loc = ZCodeSplit(z);
92       if (IjkActive(layout.eextent, loc)) break;
93     }
94     layout.zstarts[r + 1] = z;
95   }
96   *zlayout = layout;
97   PetscFunctionReturn(0);
98 }
99 
100 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
101 {
102   PetscInt remote_elem = 0;
103   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
104     Ijk loc = ZCodeSplit(rz);
105     if (IjkActive(layout->eextent, loc)) remote_elem++;
106   }
107   return remote_elem;
108 }
109 
110 PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
111 {
112   PetscInt lo = 0, hi = n;
113 
114   if (n == 0) return -1;
115   while (hi - lo > 1) {
116     PetscInt mid = lo + (hi - lo) / 2;
117     if (key < X[mid]) hi = mid;
118     else lo = mid;
119   }
120   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
121 }
122 
123 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure, PetscSegBuffer my_donor_faces)
124 {
125   MPI_Comm     comm;
126   size_t       num_faces;
127   PetscInt     dim, *faces, vStart, vEnd;
128   PetscMPIInt  size;
129   ZCode       *donor_verts, *donor_minz;
130   PetscSFNode *leaf;
131 
132   PetscFunctionBegin;
133   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
134   PetscCallMPI(MPI_Comm_size(comm, &size));
135   PetscCall(DMGetDimension(dm, &dim));
136   const PetscInt csize = PetscPowInt(2, dim - 1);
137   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
138   PetscCall(PetscSegBufferGetSize(per_faces, &num_faces));
139   PetscCall(PetscSegBufferExtractInPlace(per_faces, &faces));
140   PetscCall(PetscSegBufferExtractInPlace(donor_face_closure, &donor_verts));
141   PetscCall(PetscMalloc1(num_faces, &donor_minz));
142   PetscCall(PetscMalloc1(num_faces, &leaf));
143   for (PetscInt i = 0; i < (PetscInt)num_faces; i++) {
144     ZCode minz = donor_verts[i * csize];
145     for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
146     donor_minz[i] = minz;
147   }
148   {
149     PetscBool sorted;
150     PetscCall(PetscSortedInt64(num_faces, (const PetscInt64 *)donor_minz, &sorted));
151     PetscCheck(sorted, comm, PETSC_ERR_PLIB, "minz not sorted; periodicity in multiple dimensions not yet supported");
152   }
153   for (PetscInt i = 0; i < (PetscInt)num_faces;) {
154     ZCode    z           = donor_minz[i];
155     PetscInt remote_rank = ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0;
156     if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
157     // Process all the vertices on this rank
158     for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
159       Ijk loc = ZCodeSplit(rz);
160       if (rz == z) {
161         leaf[i].rank  = remote_rank;
162         leaf[i].index = remote_count;
163         i++;
164         if (i == (PetscInt)num_faces) break;
165         z = donor_minz[i];
166       }
167       if (IjkActive(layout->vextent, loc)) remote_count++;
168     }
169   }
170   PetscCall(PetscFree(donor_minz));
171   PetscSF sfper;
172   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sfper));
173   PetscCall(PetscSFSetGraph(sfper, vEnd - vStart, num_faces, PETSC_NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
174   const PetscInt *my_donor_degree;
175   PetscCall(PetscSFComputeDegreeBegin(sfper, &my_donor_degree));
176   PetscCall(PetscSFComputeDegreeEnd(sfper, &my_donor_degree));
177   PetscInt num_multiroots = 0;
178   for (PetscInt i = 0; i < vEnd - vStart; i++) {
179     num_multiroots += my_donor_degree[i];
180     if (my_donor_degree[i] == 0) continue;
181     PetscAssert(my_donor_degree[i] == 1, comm, PETSC_ERR_SUP, "Local vertex has multiple faces");
182   }
183   PetscInt *my_donors, *donor_indices, *my_donor_indices;
184   size_t    num_my_donors;
185   PetscCall(PetscSegBufferGetSize(my_donor_faces, &num_my_donors));
186   PetscCheck((PetscInt)num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request does not match expected donors");
187   PetscCall(PetscSegBufferExtractInPlace(my_donor_faces, &my_donors));
188   PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
189   for (PetscInt i = 0; i < (PetscInt)num_my_donors; i++) {
190     PetscInt f = my_donors[i];
191     PetscInt num_points, *points = NULL, minv = PETSC_MAX_INT;
192     PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
193     for (PetscInt j = 0; j < num_points; j++) {
194       PetscInt p = points[2 * j];
195       if (p < vStart || vEnd <= p) continue;
196       minv = PetscMin(minv, p);
197     }
198     PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
199     PetscAssert(my_donor_degree[minv - vStart] == 1, comm, PETSC_ERR_SUP, "Local vertex not requested");
200     my_donor_indices[minv - vStart] = f;
201   }
202   PetscCall(PetscMalloc1(num_faces, &donor_indices));
203   PetscCall(PetscSFBcastBegin(sfper, MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
204   PetscCall(PetscSFBcastEnd(sfper, MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
205   PetscCall(PetscFree(my_donor_indices));
206   // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
207   for (PetscInt i = 0; i < (PetscInt)num_faces; i++) leaf[i].index = donor_indices[i];
208   PetscCall(PetscFree(donor_indices));
209   PetscInt pStart, pEnd;
210   PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
211   PetscCall(PetscSFSetGraph(sfper, pEnd - pStart, num_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
212   PetscCall(PetscObjectSetName((PetscObject)sfper, "Periodic Faces"));
213   PetscCall(PetscSFViewFromOptions(sfper, NULL, "-sfper_view"));
214 
215   PetscCall(DMPlexSetPeriodicFaceSF(dm, sfper));
216 
217   PetscScalar t[4][4] = {{0}};
218   t[0][0]             = 1;
219   t[1][1]             = 1;
220   t[2][2]             = 1;
221   t[3][3]             = 1;
222   for (PetscInt i = 0; i < dim; i++)
223     if (periodicity[i] == DM_BOUNDARY_PERIODIC) t[i][3] = 1;
224   PetscCall(DMPlexSetPeriodicFaceTransform(dm, t));
225   PetscCall(PetscSFDestroy(&sfper));
226   PetscFunctionReturn(0);
227 }
228 
229 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
230 {
231   PetscFunctionBegin;
232   PetscCall(VecScatterBegin(dm->periodic.affine_to_local, dm->periodic.affine, l, ADD_VALUES, SCATTER_FORWARD));
233   PetscCall(VecScatterEnd(dm->periodic.affine_to_local, dm->periodic.affine, l, ADD_VALUES, SCATTER_FORWARD));
234   PetscFunctionReturn(0);
235 }
236 
237 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure.
238 //
239 // While the image face and corresponding donor face might not have the same orientation, it is assumed that the vertex
240 // numbering is consistent.
241 static PetscErrorCode DMPlexSFCreateClosureSF_Private(DM dm, PetscSF face_sf, PetscSF *closure_sf, IS *is_points)
242 {
243   MPI_Comm           comm;
244   PetscInt           nroots, nleaves, npoints;
245   const PetscInt    *filocal, *pilocal;
246   const PetscSFNode *firemote, *piremote;
247   PetscMPIInt        rank;
248   PetscSF            point_sf;
249 
250   PetscFunctionBegin;
251   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
252   PetscCallMPI(MPI_Comm_rank(comm, &rank));
253   PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
254   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
255   PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
256   PetscInt *rootdata, *leafdata;
257   PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
258   for (PetscInt i = 0; i < nleaves; i++) {
259     PetscInt point = filocal[i], cl_size, *closure = NULL;
260     PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
261     leafdata[point] = cl_size - 1;
262     PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
263   }
264   PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
265   PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
266 
267   PetscInt root_offset = 0;
268   for (PetscInt p = 0; p < nroots; p++) {
269     const PetscInt *donor_dof = rootdata + nroots;
270     if (donor_dof[p] == 0) {
271       rootdata[2 * p]     = -1;
272       rootdata[2 * p + 1] = -1;
273       continue;
274     }
275     PetscInt  cl_size;
276     PetscInt *closure = NULL;
277     PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
278     // cl_size - 1 = points not including self
279     PetscAssert(donor_dof[p] == cl_size - 1, comm, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
280     rootdata[2 * p]     = root_offset;
281     rootdata[2 * p + 1] = cl_size - 1;
282     root_offset += cl_size - 1;
283     PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
284   }
285   PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
286   PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
287   // Count how many leaves we need to communicate the closures
288   PetscInt leaf_offset = 0;
289   for (PetscInt i = 0; i < nleaves; i++) {
290     PetscInt point = filocal[i];
291     if (leafdata[2 * point + 1] < 0) continue;
292     leaf_offset += leafdata[2 * point + 1];
293   }
294 
295   PetscSFNode *closure_leaf;
296   PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
297   leaf_offset = 0;
298   for (PetscInt i = 0; i < nleaves; i++) {
299     PetscInt point   = filocal[i];
300     PetscInt cl_size = leafdata[2 * point + 1];
301     if (cl_size < 0) continue;
302     for (PetscInt j = 0; j < cl_size; j++) {
303       closure_leaf[leaf_offset].rank  = firemote[i].rank;
304       closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
305       leaf_offset++;
306     }
307   }
308 
309   PetscSF sf_closure;
310   PetscCall(PetscSFCreate(comm, &sf_closure));
311   PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));
312 
313   // Pack root buffer with owner for every point in the root cones
314   PetscSFNode *donor_closure;
315   PetscCall(PetscCalloc1(root_offset, &donor_closure));
316   root_offset = 0;
317   for (PetscInt p = 0; p < nroots; p++) {
318     if (rootdata[2 * p] < 0) continue;
319     PetscInt  cl_size;
320     PetscInt *closure = NULL;
321     PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
322     for (PetscInt j = 1; j < cl_size; j++) {
323       PetscInt c = closure[2 * j];
324       if (pilocal) {
325         PetscInt found = -1;
326         if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
327         if (found >= 0) {
328           donor_closure[root_offset++] = piremote[found];
329           continue;
330         }
331       }
332       // we own c
333       donor_closure[root_offset].rank  = rank;
334       donor_closure[root_offset].index = c;
335       root_offset++;
336     }
337     PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
338   }
339 
340   PetscSFNode *leaf_donor_closure;
341   PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
342   PetscCall(PetscSFBcastBegin(sf_closure, MPIU_2INT, donor_closure, leaf_donor_closure, MPI_REPLACE));
343   PetscCall(PetscSFBcastEnd(sf_closure, MPIU_2INT, donor_closure, leaf_donor_closure, MPI_REPLACE));
344   PetscCall(PetscSFDestroy(&sf_closure));
345   PetscCall(PetscFree(donor_closure));
346 
347   PetscSFNode *new_iremote;
348   PetscCall(PetscCalloc1(nroots, &new_iremote));
349   for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
350   // Walk leaves and match vertices
351   leaf_offset = 0;
352   for (PetscInt i = 0; i < nleaves; i++) {
353     PetscInt  point   = filocal[i], cl_size;
354     PetscInt *closure = NULL;
355     PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
356     for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
357       PetscInt    c  = closure[2 * j];
358       PetscSFNode lc = leaf_donor_closure[leaf_offset];
359       // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
360       if (new_iremote[c].rank == -1) {
361         new_iremote[c] = lc;
362       } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, comm, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
363       leaf_offset++;
364     }
365     PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
366   }
367   PetscCall(PetscFree(leaf_donor_closure));
368 
369   // Include face points in closure SF
370   for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
371   // consolidate leaves
372   PetscInt num_new_leaves = 0;
373   for (PetscInt i = 0; i < nroots; i++) {
374     if (new_iremote[i].rank == -1) continue;
375     new_iremote[num_new_leaves] = new_iremote[i];
376     leafdata[num_new_leaves]    = i;
377     num_new_leaves++;
378   }
379   PetscCall(ISCreateGeneral(comm, num_new_leaves, leafdata, PETSC_COPY_VALUES, is_points));
380 
381   PetscSF csf;
382   PetscCall(PetscSFCreate(comm, &csf));
383   PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
384   PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
385   PetscCall(PetscFree2(rootdata, leafdata));
386 
387   // TODO: this is a lie; it's only the periodic point SF; need to compose with standard point SF
388   PetscCall(PetscObjectSetName((PetscObject)csf, "Composed Periodic Points"));
389   PetscSFViewFromOptions(csf, NULL, "-csf_view");
390   *closure_sf = csf;
391   PetscFunctionReturn(0);
392 }
393 
394 static PetscErrorCode DMGetPointSFComposed_Plex(DM dm, PetscSF *sf)
395 {
396   DM_Plex *plex = (DM_Plex *)dm->data;
397 
398   PetscFunctionBegin;
399   if (!plex->periodic.composed_sf) {
400     PetscSF face_sf = plex->periodic.face_sf;
401 
402     PetscCall(DMPlexSFCreateClosureSF_Private(dm, face_sf, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
403   }
404   if (sf) *sf = plex->periodic.composed_sf;
405   PetscFunctionReturn(0);
406 }
407 
408 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
409 {
410   DM_Plex *plex = (DM_Plex *)dm->data;
411   PetscFunctionBegin;
412   if (!plex->periodic.face_sf) PetscFunctionReturn(0);
413   PetscCall(DMGetPointSFComposed_Plex(dm, NULL));
414   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetPointSFComposed_C", DMGetPointSFComposed_Plex));
415 
416   PetscInt dim;
417   PetscCall(DMGetDimension(dm, &dim));
418   size_t count;
419   IS     isdof;
420   {
421     PetscInt        npoints;
422     const PetscInt *points;
423     IS              is = plex->periodic.periodic_points;
424     PetscSegBuffer  seg;
425     PetscSection    section;
426     PetscCall(DMGetLocalSection(dm, &section));
427     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
428     PetscCall(ISGetSize(is, &npoints));
429     PetscCall(ISGetIndices(is, &points));
430     for (PetscInt i = 0; i < npoints; i++) {
431       PetscInt point = points[i], off, dof;
432       PetscCall(PetscSectionGetOffset(section, point, &off));
433       PetscCall(PetscSectionGetDof(section, point, &dof));
434       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
435       for (PetscInt j = 0; j < dof / dim; j++) {
436         PetscInt *slot;
437         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
438         *slot = off / dim;
439       }
440     }
441     PetscInt *ind;
442     PetscCall(PetscSegBufferGetSize(seg, &count));
443     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
444     PetscCall(PetscSegBufferDestroy(&seg));
445     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, count, ind, PETSC_OWN_POINTER, &isdof));
446   }
447   Vec        L, P;
448   VecType    vec_type;
449   VecScatter scatter;
450   PetscCall(DMGetLocalVector(dm, &L));
451   PetscCall(VecCreate(PETSC_COMM_SELF, &P));
452   PetscCall(VecSetSizes(P, count * dim, count * dim));
453   PetscCall(VecGetType(L, &vec_type));
454   PetscCall(VecSetType(P, vec_type));
455   PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
456   PetscCall(DMRestoreLocalVector(dm, &L));
457   PetscCall(ISDestroy(&isdof));
458 
459   {
460     PetscScalar *x;
461     PetscCall(VecGetArrayWrite(P, &x));
462     for (PetscInt i = 0; i < (PetscInt)count; i++) {
463       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[j][3];
464     }
465     PetscCall(VecRestoreArrayWrite(P, &x));
466   }
467 
468   dm->periodic.affine_to_local = scatter;
469   dm->periodic.affine          = P;
470   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
471   PetscFunctionReturn(0);
472 }
473 
474 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
475 {
476   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
477   const Ijk closure_1[] = {
478     {0, 0, 0},
479     {1, 0, 0},
480   };
481   const Ijk closure_2[] = {
482     {0, 0, 0},
483     {1, 0, 0},
484     {1, 1, 0},
485     {0, 1, 0},
486   };
487   const Ijk closure_3[] = {
488     {0, 0, 0},
489     {0, 1, 0},
490     {1, 1, 0},
491     {1, 0, 0},
492     {0, 0, 1},
493     {1, 0, 1},
494     {1, 1, 1},
495     {0, 1, 1},
496   };
497   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
498   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
499   const PetscInt        face_marker_1[]   = {1, 2};
500   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
501   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
502   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
503   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
504   // These orientations can be determined by examining cones of a reference quad and hex element.
505   const PetscInt        face_orient_1[]   = {0, 0};
506   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
507   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
508   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};
509 
510   PetscFunctionBegin;
511   PetscValidPointer(dm, 1);
512   PetscValidLogicalCollectiveInt(dm, dim, 2);
513   PetscCall(DMSetDimension(dm, dim));
514   PetscMPIInt rank, size;
515   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
516   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
517   for (PetscInt i = 0; i < dim; i++) {
518     eextent[i] = faces[i];
519     vextent[i] = faces[i] + 1;
520   }
521   ZLayout layout;
522   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
523   PetscZSet vset; // set of all vertices in the closure of the owned elements
524   PetscCall(PetscZSetCreate(&vset));
525   PetscInt local_elems = 0;
526   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
527     Ijk loc = ZCodeSplit(z);
528     if (IjkActive(layout.vextent, loc)) PetscZSetAdd(vset, z);
529     if (IjkActive(layout.eextent, loc)) {
530       local_elems++;
531       // Add all neighboring vertices to set
532       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
533         Ijk   inc  = closure_dim[dim][n];
534         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
535         ZCode v    = ZEncode(nloc);
536         PetscZSetAdd(vset, v);
537       }
538     }
539   }
540   PetscInt local_verts, off = 0;
541   ZCode   *vert_z;
542   PetscCall(PetscZSetGetSize(vset, &local_verts));
543   PetscCall(PetscMalloc1(local_verts, &vert_z));
544   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
545   PetscCall(PetscZSetDestroy(&vset));
546   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
547   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));
548 
549   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
550   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
551   PetscCall(DMSetUp(dm));
552   {
553     PetscInt e = 0;
554     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
555       Ijk loc = ZCodeSplit(z);
556       if (!IjkActive(layout.eextent, loc)) continue;
557       PetscInt cone[8], orient[8] = {0};
558       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
559         Ijk      inc  = closure_dim[dim][n];
560         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
561         ZCode    v    = ZEncode(nloc);
562         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
563         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
564         cone[n] = local_elems + ci;
565       }
566       PetscCall(DMPlexSetCone(dm, e, cone));
567       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
568       e++;
569     }
570   }
571 
572   PetscCall(DMPlexSymmetrize(dm));
573   PetscCall(DMPlexStratify(dm));
574 
575   { // Create point SF
576     PetscSF sf;
577     PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf);
578     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
579     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
580     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
581     PetscInt    *local_ghosts;
582     PetscSFNode *ghosts;
583     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
584     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
585     for (PetscInt i = 0; i < num_ghosts;) {
586       ZCode    z           = vert_z[owned_verts + i];
587       PetscInt remote_rank = ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0;
588       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
589       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)
590 
591       // Count the elements on remote_rank
592       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);
593 
594       // Traverse vertices and make ghost links
595       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
596         Ijk loc = ZCodeSplit(rz);
597         if (rz == z) {
598           local_ghosts[i] = local_elems + owned_verts + i;
599           ghosts[i].rank  = remote_rank;
600           ghosts[i].index = remote_elem + remote_count;
601           i++;
602           if (i == num_ghosts) break;
603           z = vert_z[owned_verts + i];
604         }
605         if (IjkActive(layout.vextent, loc)) remote_count++;
606       }
607     }
608     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
609     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
610     PetscCall(DMSetPointSF(dm, sf));
611     PetscCall(PetscSFDestroy(&sf));
612   }
613   {
614     Vec          coordinates;
615     PetscScalar *coords;
616     PetscSection coord_section;
617     PetscInt     coord_size;
618     PetscCall(DMGetCoordinateSection(dm, &coord_section));
619     PetscCall(PetscSectionSetNumFields(coord_section, 1));
620     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
621     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
622     for (PetscInt v = 0; v < local_verts; v++) {
623       PetscInt point = local_elems + v;
624       PetscCall(PetscSectionSetDof(coord_section, point, dim));
625       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
626     }
627     PetscCall(PetscSectionSetUp(coord_section));
628     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
629     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
630     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
631     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
632     PetscCall(VecSetBlockSize(coordinates, dim));
633     PetscCall(VecSetType(coordinates, VECSTANDARD));
634     PetscCall(VecGetArray(coordinates, &coords));
635     for (PetscInt v = 0; v < local_verts; v++) {
636       Ijk loc             = ZCodeSplit(vert_z[v]);
637       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
638       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
639       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
640     }
641     PetscCall(VecRestoreArray(coordinates, &coords));
642     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
643     PetscCall(VecDestroy(&coordinates));
644   }
645   if (interpolate) {
646     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
647 
648     DMLabel label;
649     PetscCall(DMCreateLabel(dm, "Face Sets"));
650     PetscCall(DMGetLabel(dm, "Face Sets", &label));
651     PetscSegBuffer per_faces, donor_face_closure, my_donor_faces;
652     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces));
653     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces));
654     PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure));
655     PetscInt fStart, fEnd, vStart, vEnd;
656     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
657     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
658     for (PetscInt f = fStart; f < fEnd; f++) {
659       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
660       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
661       PetscInt bc_count[6] = {0};
662       for (PetscInt i = 0; i < npoints; i++) {
663         PetscInt p = points[2 * i];
664         if (p < vStart || vEnd <= p) continue;
665         fverts[num_fverts++] = p;
666         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
667         // Convention here matches DMPlexCreateCubeMesh_Internal
668         bc_count[0] += loc.i == 0;
669         bc_count[1] += loc.i == layout.vextent.i - 1;
670         bc_count[2] += loc.j == 0;
671         bc_count[3] += loc.j == layout.vextent.j - 1;
672         bc_count[4] += loc.k == 0;
673         bc_count[5] += loc.k == layout.vextent.k - 1;
674       }
675       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
676       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
677         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
678           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
679           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
680             PetscInt *put;
681             if (bc % 2 == 0) { // donor face; no label
682               PetscCall(PetscSegBufferGet(my_donor_faces, 1, &put));
683               *put = f;
684             } else { // periodic face
685               PetscCall(PetscSegBufferGet(per_faces, 1, &put));
686               *put = f;
687               ZCode *zput;
688               PetscCall(PetscSegBufferGet(donor_face_closure, num_fverts, &zput));
689               for (PetscInt i = 0; i < num_fverts; i++) {
690                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
691                 switch (bc / 2) {
692                 case 0:
693                   loc.i = 0;
694                   break;
695                 case 1:
696                   loc.j = 0;
697                   break;
698                 case 2:
699                   loc.k = 0;
700                   break;
701                 }
702                 *zput++ = ZEncode(loc);
703               }
704             }
705             continue;
706           }
707           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
708           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
709           bc_match++;
710         }
711       }
712     }
713     // Ensure that the Coordinate DM has our new boundary labels
714     DM cdm;
715     PetscCall(DMGetCoordinateDM(dm, &cdm));
716     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
717     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
718       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, periodicity, donor_face_closure, my_donor_faces));
719       PetscSF sfper;
720       PetscCall(DMPlexGetPeriodicFaceSF(dm, &sfper));
721       PetscCall(DMPlexSetPeriodicFaceSF(cdm, sfper));
722       cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
723     }
724     PetscCall(PetscSegBufferDestroy(&per_faces));
725     PetscCall(PetscSegBufferDestroy(&donor_face_closure));
726     PetscCall(PetscSegBufferDestroy(&my_donor_faces));
727   }
728   PetscCall(PetscFree(layout.zstarts));
729   PetscCall(PetscFree(vert_z));
730   PetscFunctionReturn(0);
731 }
732 
733 /*@
734   DMPlexSetPeriodicFaceSF - Express periodicity from an existing mesh
735 
736   Logically collective
737 
738   Input Parameters:
739 + dm - The `DMPLEX` on which to set periodicity
740 - face_sf - SF in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
741 
742   Level: advanced
743 
744   Notes:
745 
746   One can use `-dm_plex_box_sfc` to use this mode of periodicity, wherein the periodic points are distinct both globally
747   and locally, but are paired when creating a global dof space.
748 
749 .seealso: [](chapter_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetPeriodicFaceSF()`
750 @*/
751 PetscErrorCode DMPlexSetPeriodicFaceSF(DM dm, PetscSF face_sf)
752 {
753   DM_Plex *plex = (DM_Plex *)dm->data;
754   PetscFunctionBegin;
755   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
756   PetscCall(PetscObjectReference((PetscObject)face_sf));
757   PetscCall(PetscSFDestroy(&plex->periodic.face_sf));
758   plex->periodic.face_sf = face_sf;
759   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetPointSFComposed_C", DMGetPointSFComposed_Plex));
760   PetscFunctionReturn(0);
761 }
762 
763 /*@
764   DMPlexGetPeriodicFaceSF - Obtain periodicity for a mesh
765 
766   Logically collective
767 
768   Input Parameters:
769 . dm - The `DMPLEX` for which to obtain periodic relation
770 
771   Output Parameters:
772 . face_sf - SF in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.
773 
774   Level: advanced
775 
776 .seealso: [](chapter_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetPeriodicFaceSF()`
777 @*/
778 PetscErrorCode DMPlexGetPeriodicFaceSF(DM dm, PetscSF *face_sf)
779 {
780   DM_Plex *plex = (DM_Plex *)dm->data;
781   PetscFunctionBegin;
782   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
783   *face_sf = plex->periodic.face_sf;
784   PetscFunctionReturn(0);
785 }
786 
787 /*@
788   DMPlexSetPeriodicFaceTransform - set geometric transform from donor to periodic points
789 
790   Logically Collective
791 
792   Input Arguments:
793 + dm - `DMPlex` that has been configured with `DMPlexSetPeriodicFaceSF()`
794 - t - 4x4 affine transformation basis.
795 
796 @*/
797 PetscErrorCode DMPlexSetPeriodicFaceTransform(DM dm, const PetscScalar t[4][4])
798 {
799   DM_Plex *plex = (DM_Plex *)dm->data;
800   PetscFunctionBegin;
801   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
802   for (PetscInt i = 0; i < 4; i++) {
803     for (PetscInt j = 0; j < 4; j++) {
804       PetscCheck(i != j || t[i][j] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
805       plex->periodic.transform[i][j] = t[i][j];
806     }
807   }
808   PetscFunctionReturn(0);
809 }
810