1 #include <petsc/private/dmpleximpl.h> /*I "petscdmplex.h" I*/ 2 #include <petscsf.h> 3 #include <petsc/private/hashset.h> 4 5 typedef uint64_t ZCode; 6 7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual) 8 9 typedef struct { 10 PetscInt i, j, k; 11 } Ijk; 12 13 typedef struct { 14 Ijk eextent; 15 Ijk vextent; 16 PetscMPIInt comm_size; 17 ZCode *zstarts; 18 } ZLayout; 19 20 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 21 static unsigned ZCodeSplit1(ZCode z) 22 { 23 z &= 0x1249249249249249; 24 z = (z | z >> 2) & 0x10c30c30c30c30c3; 25 z = (z | z >> 4) & 0x100f00f00f00f00f; 26 z = (z | z >> 8) & 0x1f0000ff0000ff; 27 z = (z | z >> 16) & 0x1f00000000ffff; 28 z = (z | z >> 32) & 0x1fffff; 29 return (unsigned)z; 30 } 31 32 static ZCode ZEncode1(unsigned t) 33 { 34 ZCode z = t; 35 z &= 0x1fffff; 36 z = (z | z << 32) & 0x1f00000000ffff; 37 z = (z | z << 16) & 0x1f0000ff0000ff; 38 z = (z | z << 8) & 0x100f00f00f00f00f; 39 z = (z | z << 4) & 0x10c30c30c30c30c3; 40 z = (z | z << 2) & 0x1249249249249249; 41 return z; 42 } 43 44 static Ijk ZCodeSplit(ZCode z) 45 { 46 Ijk c; 47 c.i = ZCodeSplit1(z >> 2); 48 c.j = ZCodeSplit1(z >> 1); 49 c.k = ZCodeSplit1(z >> 0); 50 return c; 51 } 52 53 static ZCode ZEncode(Ijk c) 54 { 55 ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k); 56 return z; 57 } 58 59 static PetscBool IjkActive(Ijk extent, Ijk l) 60 { 61 if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE; 62 return PETSC_FALSE; 63 } 64 65 // If z is not the base of an octet (last 3 bits 0), return 0. 66 // 67 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z 68 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point. 69 static ZCode ZStepOct(ZCode z) 70 { 71 if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0 72 ZCode step = 07; 73 for (; (z & step) == 0; step = (step << 3) | 07) { } 74 return step >> 3; 75 } 76 77 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous. 78 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout) 79 { 80 PetscFunctionBegin; 81 layout->eextent.i = eextent[0]; 82 layout->eextent.j = eextent[1]; 83 layout->eextent.k = eextent[2]; 84 layout->vextent.i = vextent[0]; 85 layout->vextent.j = vextent[1]; 86 layout->vextent.k = vextent[2]; 87 layout->comm_size = size; 88 layout->zstarts = NULL; 89 PetscCall(PetscMalloc1(size + 1, &layout->zstarts)); 90 91 PetscInt total_elems = eextent[0] * eextent[1] * eextent[2]; 92 ZCode z = 0; 93 layout->zstarts[0] = 0; 94 // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound 95 for (PetscMPIInt r = 0; r < size; r++) { 96 PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count; 97 for (count = 0; count < elems_needed; z++) { 98 ZCode skip = ZStepOct(z); // optimistically attempt a longer step 99 for (ZCode s = skip;; s >>= 3) { 100 Ijk trial = ZCodeSplit(z + s); 101 if (IjkActive(layout->eextent, trial)) { 102 while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet 103 count += s + 1; 104 z += s; 105 break; 106 } 107 if (s == 0) { // the whole skip octet is inactive 108 z += skip; 109 break; 110 } 111 } 112 } 113 // Pick up any extra vertices in the Z ordering before the next rank's first owned element. 114 // 115 // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up 116 // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will 117 // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of 118 // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution). 119 // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would 120 // complicate the job of identifying an owner and its offset. 121 // 122 // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is 123 // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's 124 // the issue: 125 // 126 // Consider this partition on rank 0 (left) and rank 1. 127 // 128 // 4 -------- 5 -- 14 --10 -- 21 --11 129 // | | | 130 // 7 -- 16 -- 8 | | | 131 // | | 3 ------- 7 ------- 9 132 // | | | | 133 // 4 -------- 6 ------ 10 | | 134 // | | | 6 -- 16 -- 8 135 // | | | 136 // 3 ---11--- 5 --18-- 9 137 // 138 // The periodic face SF looks like 139 // [0] Number of roots=21, leaves=1, remote ranks=1 140 // [0] 16 <- (0,11) 141 // [1] Number of roots=22, leaves=2, remote ranks=2 142 // [1] 14 <- (0,18) 143 // [1] 21 <- (1,16) 144 // 145 // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use 146 // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face 147 // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16). 148 // 149 // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not 150 // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer 151 // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4) 152 // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the 153 // stranded vertices. 154 for (; z <= ZEncode(layout->vextent); z++) { 155 Ijk loc = ZCodeSplit(z); 156 if (IjkActive(layout->eextent, loc)) break; 157 z += ZStepOct(z); 158 } 159 layout->zstarts[r + 1] = z; 160 } 161 layout->zstarts[size] = ZEncode(layout->vextent); 162 PetscFunctionReturn(PETSC_SUCCESS); 163 } 164 165 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank) 166 { 167 PetscInt remote_elem = 0; 168 for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) { 169 Ijk loc = ZCodeSplit(rz); 170 if (IjkActive(layout->eextent, loc)) remote_elem++; 171 else rz += ZStepOct(rz); 172 } 173 return remote_elem; 174 } 175 176 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[]) 177 { 178 PetscInt lo = 0, hi = n; 179 180 if (n == 0) return -1; 181 while (hi - lo > 1) { 182 PetscInt mid = lo + (hi - lo) / 2; 183 if (key < X[mid]) hi = mid; 184 else lo = mid; 185 } 186 return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1); 187 } 188 189 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3]) 190 { 191 MPI_Comm comm; 192 PetscInt dim, vStart, vEnd; 193 PetscMPIInt size; 194 PetscSF face_sfs[3]; 195 PetscScalar transforms[3][4][4] = {{{0}}}; 196 197 PetscFunctionBegin; 198 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 199 PetscCallMPI(MPI_Comm_size(comm, &size)); 200 PetscCall(DMGetDimension(dm, &dim)); 201 const PetscInt csize = PetscPowInt(2, dim - 1); 202 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 203 204 PetscInt num_directions = 0; 205 for (PetscInt direction = 0; direction < dim; direction++) { 206 PetscCount num_faces; 207 PetscInt *faces; 208 ZCode *donor_verts, *donor_minz; 209 PetscSFNode *leaf; 210 PetscCount num_multiroots = 0; 211 PetscInt pStart, pEnd; 212 PetscBool sorted; 213 PetscInt inum_faces; 214 215 if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue; 216 PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces)); 217 PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces)); 218 PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts)); 219 PetscCall(PetscMalloc1(num_faces, &donor_minz)); 220 PetscCall(PetscMalloc1(num_faces, &leaf)); 221 for (PetscCount i = 0; i < num_faces; i++) { 222 ZCode minz = donor_verts[i * csize]; 223 224 for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]); 225 donor_minz[i] = minz; 226 } 227 PetscCall(PetscIntCast(num_faces, &inum_faces)); 228 PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted)); 229 // If a donor vertex were chosen to broker multiple faces, we would have a logic error. 230 // Checking for sorting is a cheap check that there are no duplicates. 231 PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked"); 232 for (PetscCount i = 0; i < num_faces;) { 233 ZCode z = donor_minz[i]; 234 PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0; 235 236 if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1; 237 // Process all the vertices on this rank 238 for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) { 239 Ijk loc = ZCodeSplit(rz); 240 241 if (rz == z) { 242 leaf[i].rank = remote_rank; 243 leaf[i].index = remote_count; 244 i++; 245 if (i == num_faces) break; 246 z = donor_minz[i]; 247 } 248 if (IjkActive(layout->vextent, loc)) remote_count++; 249 } 250 } 251 PetscCall(PetscFree(donor_minz)); 252 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions])); 253 PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER)); 254 const PetscInt *my_donor_degree; 255 PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree)); 256 PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree)); 257 258 for (PetscInt i = 0; i < vEnd - vStart; i++) { 259 num_multiroots += my_donor_degree[i]; 260 if (my_donor_degree[i] == 0) continue; 261 PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces"); 262 } 263 PetscInt *my_donors, *donor_indices, *my_donor_indices; 264 PetscCount num_my_donors; 265 266 PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors)); 267 PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots); 268 PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors)); 269 PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices)); 270 for (PetscCount i = 0; i < num_my_donors; i++) { 271 PetscInt f = my_donors[i]; 272 PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX; 273 274 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points)); 275 for (PetscInt j = 0; j < num_points; j++) { 276 PetscInt p = points[2 * j]; 277 if (p < vStart || vEnd <= p) continue; 278 minv = PetscMin(minv, p); 279 } 280 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points)); 281 PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested"); 282 my_donor_indices[minv - vStart] = f; 283 } 284 PetscCall(PetscMalloc1(num_faces, &donor_indices)); 285 PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE)); 286 PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE)); 287 PetscCall(PetscFree(my_donor_indices)); 288 // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces. 289 for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i]; 290 PetscCall(PetscFree(donor_indices)); 291 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 292 PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER)); 293 { 294 char face_sf_name[PETSC_MAX_PATH_LEN]; 295 PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions)); 296 PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name)); 297 } 298 299 transforms[num_directions][0][0] = 1; 300 transforms[num_directions][1][1] = 1; 301 transforms[num_directions][2][2] = 1; 302 transforms[num_directions][3][3] = 1; 303 transforms[num_directions][direction][3] = upper[direction] - lower[direction]; 304 num_directions++; 305 } 306 307 PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs)); 308 PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms)); 309 310 for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i])); 311 PetscFunctionReturn(PETSC_SUCCESS); 312 } 313 314 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to 315 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector). 316 // We use this crude approach here so we don't have to write new GPU kernels yet. 317 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx) 318 { 319 PetscFunctionBegin; 320 // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped. 321 for (PetscInt i = 0; i < dm->periodic.num_affines; i++) { 322 PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD)); 323 PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD)); 324 } 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure 329 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures 330 // are isomorphic. It may be useful/necessary for this restriction to be loosened. 331 // 332 // Output Arguments: 333 // 334 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This 335 // can be used to create a global section and section SF. 336 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine 337 // transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal(). 338 // 339 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points) 340 { 341 MPI_Comm comm; 342 PetscMPIInt rank; 343 PetscSF point_sf; 344 PetscInt nroots, nleaves; 345 const PetscInt *filocal; 346 const PetscSFNode *firemote; 347 348 PetscFunctionBegin; 349 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 350 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 351 PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points 352 PetscCall(PetscMalloc1(num_face_sfs, is_points)); 353 354 for (PetscInt f = 0; f < num_face_sfs; f++) { 355 PetscSF face_sf = face_sfs[f]; 356 PetscInt *rootdata, *leafdata; 357 358 PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote)); 359 PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata)); 360 for (PetscInt i = 0; i < nleaves; i++) { 361 PetscInt point = filocal[i], cl_size, *closure = NULL; 362 PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 363 leafdata[point] = cl_size - 1; 364 PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 365 } 366 PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM)); 367 PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM)); 368 369 PetscInt root_offset = 0; 370 for (PetscInt p = 0; p < nroots; p++) { 371 const PetscInt *donor_dof = rootdata + nroots; 372 if (donor_dof[p] == 0) { 373 rootdata[2 * p] = -1; 374 rootdata[2 * p + 1] = -1; 375 continue; 376 } 377 PetscInt cl_size; 378 PetscInt *closure = NULL; 379 PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 380 // cl_size - 1 = points not including self 381 PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes"); 382 rootdata[2 * p] = root_offset; 383 rootdata[2 * p + 1] = cl_size - 1; 384 root_offset += cl_size - 1; 385 PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 386 } 387 PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE)); 388 PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE)); 389 // Count how many leaves we need to communicate the closures 390 PetscInt leaf_offset = 0; 391 for (PetscInt i = 0; i < nleaves; i++) { 392 PetscInt point = filocal[i]; 393 if (leafdata[2 * point + 1] < 0) continue; 394 leaf_offset += leafdata[2 * point + 1]; 395 } 396 397 PetscSFNode *closure_leaf; 398 PetscCall(PetscMalloc1(leaf_offset, &closure_leaf)); 399 leaf_offset = 0; 400 for (PetscInt i = 0; i < nleaves; i++) { 401 PetscInt point = filocal[i]; 402 PetscInt cl_size = leafdata[2 * point + 1]; 403 if (cl_size < 0) continue; 404 for (PetscInt j = 0; j < cl_size; j++) { 405 closure_leaf[leaf_offset].rank = firemote[i].rank; 406 closure_leaf[leaf_offset].index = leafdata[2 * point] + j; 407 leaf_offset++; 408 } 409 } 410 411 PetscSF sf_closure; 412 PetscCall(PetscSFCreate(comm, &sf_closure)); 413 PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER)); 414 415 PetscSFNode *leaf_donor_closure; 416 { // Pack root buffer with owner for every point in the root cones 417 PetscSFNode *donor_closure; 418 const PetscInt *pilocal; 419 const PetscSFNode *piremote; 420 PetscInt npoints; 421 422 PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote)); 423 PetscCall(PetscCalloc1(root_offset, &donor_closure)); 424 root_offset = 0; 425 for (PetscInt p = 0; p < nroots; p++) { 426 if (rootdata[2 * p] < 0) continue; 427 PetscInt cl_size; 428 PetscInt *closure = NULL; 429 PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 430 for (PetscInt j = 1; j < cl_size; j++) { 431 PetscInt c = closure[2 * j]; 432 if (pilocal) { 433 PetscInt found = -1; 434 if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found)); 435 if (found >= 0) { 436 donor_closure[root_offset++] = piremote[found]; 437 continue; 438 } 439 } 440 // we own c 441 donor_closure[root_offset].rank = rank; 442 donor_closure[root_offset].index = c; 443 root_offset++; 444 } 445 PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 446 } 447 448 PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure)); 449 PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE)); 450 PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE)); 451 PetscCall(PetscSFDestroy(&sf_closure)); 452 PetscCall(PetscFree(donor_closure)); 453 } 454 455 PetscSFNode *new_iremote; 456 PetscCall(PetscCalloc1(nroots, &new_iremote)); 457 for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1; 458 // Walk leaves and match vertices 459 leaf_offset = 0; 460 for (PetscInt i = 0; i < nleaves; i++) { 461 PetscInt point = filocal[i], cl_size; 462 PetscInt *closure = NULL; 463 PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 464 for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency? 465 PetscInt c = closure[2 * j]; 466 PetscSFNode lc = leaf_donor_closure[leaf_offset]; 467 // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index); 468 if (new_iremote[c].rank == -1) { 469 new_iremote[c] = lc; 470 } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces"); 471 leaf_offset++; 472 } 473 PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 474 } 475 PetscCall(PetscFree(leaf_donor_closure)); 476 477 // Include face points in closure SF 478 for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i]; 479 // consolidate leaves 480 PetscInt num_new_leaves = 0; 481 for (PetscInt i = 0; i < nroots; i++) { 482 if (new_iremote[i].rank == -1) continue; 483 new_iremote[num_new_leaves] = new_iremote[i]; 484 leafdata[num_new_leaves] = i; 485 num_new_leaves++; 486 } 487 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f])); 488 489 PetscSF csf; 490 PetscCall(PetscSFCreate(comm, &csf)); 491 PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES)); 492 PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be 493 PetscCall(PetscFree2(rootdata, leafdata)); 494 495 PetscInt npoints; 496 PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL)); 497 if (npoints < 0) { // empty point_sf 498 *closure_sf = csf; 499 } else { 500 PetscCall(PetscSFMerge(point_sf, csf, closure_sf)); 501 PetscCall(PetscSFDestroy(&csf)); 502 } 503 if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge 504 point_sf = *closure_sf; // Use combined point + isoperiodic SF to define point ownership for further face_sf 505 } 506 PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points")); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf) 511 { 512 DM_Plex *plex = (DM_Plex *)dm->data; 513 514 PetscFunctionBegin; 515 if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points)); 516 if (sf) *sf = plex->periodic.composed_sf; 517 PetscFunctionReturn(PETSC_SUCCESS); 518 } 519 520 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration) 521 { 522 DM_Plex *plex = (DM_Plex *)old_dm->data; 523 PetscSF sf_point, *new_face_sfs; 524 PetscMPIInt rank; 525 526 PetscFunctionBegin; 527 if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 528 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 529 PetscCall(DMGetPointSF(dm, &sf_point)); 530 PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs)); 531 532 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) { 533 PetscInt old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf; 534 PetscSFNode *new_leafdata, *rootdata, *leafdata; 535 const PetscInt *old_local, *point_local; 536 const PetscSFNode *old_remote, *point_remote; 537 538 PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote)); 539 PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL)); 540 PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote)); 541 PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space"); 542 PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata)); 543 544 // Fill new_leafdata with new owners of all points 545 for (PetscInt i = 0; i < new_npoints; i++) { 546 new_leafdata[i].rank = rank; 547 new_leafdata[i].index = i; 548 } 549 for (PetscInt i = 0; i < point_nleaf; i++) { 550 PetscInt j = point_local[i]; 551 new_leafdata[j] = point_remote[i]; 552 } 553 // REPLACE is okay because every leaf agrees about the new owners 554 PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE)); 555 PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE)); 556 // rootdata now contains the new owners 557 558 // Send to leaves of old space 559 for (PetscInt i = 0; i < old_npoints; i++) { 560 leafdata[i].rank = -1; 561 leafdata[i].index = -1; 562 } 563 PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE)); 564 PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE)); 565 566 // Send to new leaf space 567 PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE)); 568 PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE)); 569 570 PetscInt nface = 0, *new_local; 571 PetscSFNode *new_remote; 572 for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0); 573 PetscCall(PetscMalloc1(nface, &new_local)); 574 PetscCall(PetscMalloc1(nface, &new_remote)); 575 nface = 0; 576 for (PetscInt i = 0; i < new_npoints; i++) { 577 if (new_leafdata[i].rank == -1) continue; 578 new_local[nface] = i; 579 new_remote[nface] = new_leafdata[i]; 580 nface++; 581 } 582 PetscCall(PetscFree3(rootdata, leafdata, new_leafdata)); 583 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f])); 584 PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER)); 585 { 586 char new_face_sf_name[PETSC_MAX_PATH_LEN]; 587 PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f)); 588 PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name)); 589 } 590 } 591 592 PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs)); 593 PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform)); 594 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f])); 595 PetscCall(PetscFree(new_face_sfs)); 596 PetscFunctionReturn(PETSC_SUCCESS); 597 } 598 599 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm) 600 { 601 DM_Plex *plex = (DM_Plex *)dm->data; 602 PetscCount count; 603 IS isdof; 604 PetscInt dim; 605 606 PetscFunctionBegin; 607 if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 608 PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL)); 609 PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex)); 610 611 PetscCall(DMGetDimension(dm, &dim)); 612 dm->periodic.num_affines = plex->periodic.num_face_sfs; 613 PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine)); 614 615 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) { 616 PetscInt npoints, vsize, isize; 617 const PetscInt *points; 618 IS is = plex->periodic.periodic_points[f]; 619 PetscSegBuffer seg; 620 PetscSection section; 621 PetscInt *ind; 622 Vec L, P; 623 VecType vec_type; 624 VecScatter scatter; 625 PetscScalar *x; 626 627 PetscCall(DMGetLocalSection(dm, §ion)); 628 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg)); 629 PetscCall(ISGetSize(is, &npoints)); 630 PetscCall(ISGetIndices(is, &points)); 631 for (PetscInt i = 0; i < npoints; i++) { 632 PetscInt point = points[i], off, dof; 633 634 PetscCall(PetscSectionGetOffset(section, point, &off)); 635 PetscCall(PetscSectionGetDof(section, point, &dof)); 636 PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim); 637 for (PetscInt j = 0; j < dof / dim; j++) { 638 PetscInt *slot; 639 640 PetscCall(PetscSegBufferGetInts(seg, 1, &slot)); 641 *slot = off / dim + j; 642 } 643 } 644 PetscCall(PetscSegBufferGetSize(seg, &count)); 645 PetscCall(PetscSegBufferExtractAlloc(seg, &ind)); 646 PetscCall(PetscSegBufferDestroy(&seg)); 647 PetscCall(PetscIntCast(count, &isize)); 648 PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof)); 649 650 PetscCall(PetscIntCast(count * dim, &vsize)); 651 PetscCall(DMGetLocalVector(dm, &L)); 652 PetscCall(VecCreate(PETSC_COMM_SELF, &P)); 653 PetscCall(VecSetSizes(P, vsize, vsize)); 654 PetscCall(VecGetType(L, &vec_type)); 655 PetscCall(VecSetType(P, vec_type)); 656 PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter)); 657 PetscCall(DMRestoreLocalVector(dm, &L)); 658 PetscCall(ISDestroy(&isdof)); 659 660 PetscCall(VecGetArrayWrite(P, &x)); 661 for (PetscCount i = 0; i < count; i++) { 662 for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3]; 663 } 664 PetscCall(VecRestoreArrayWrite(P, &x)); 665 666 dm->periodic.affine_to_local[f] = scatter; 667 dm->periodic.affine[f] = P; 668 } 669 PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL)); 670 PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 673 // We'll just orient all the edges, though only periodic boundary edges need orientation 674 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm) 675 { 676 PetscInt dim, eStart, eEnd; 677 678 PetscFunctionBegin; 679 PetscCall(DMGetDimension(dm, &dim)); 680 if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary 681 PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd)); 682 for (PetscInt e = eStart; e < eEnd; e++) { 683 const PetscInt *cone; 684 PetscCall(DMPlexGetCone(dm, e, &cone)); 685 if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1)); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate) 691 { 692 PetscInt eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1}; 693 const Ijk closure_1[] = { 694 {0, 0, 0}, 695 {1, 0, 0}, 696 }; 697 const Ijk closure_2[] = { 698 {0, 0, 0}, 699 {1, 0, 0}, 700 {1, 1, 0}, 701 {0, 1, 0}, 702 }; 703 const Ijk closure_3[] = { 704 {0, 0, 0}, 705 {0, 1, 0}, 706 {1, 1, 0}, 707 {1, 0, 0}, 708 {0, 0, 1}, 709 {1, 0, 1}, 710 {1, 1, 1}, 711 {0, 1, 1}, 712 }; 713 const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3}; 714 // This must be kept consistent with DMPlexCreateCubeMesh_Internal 715 const PetscInt face_marker_1[] = {1, 2}; 716 const PetscInt face_marker_2[] = {4, 2, 1, 3}; 717 const PetscInt face_marker_3[] = {6, 5, 3, 4, 1, 2}; 718 const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3}; 719 // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero. 720 // These orientations can be determined by examining cones of a reference quad and hex element. 721 const PetscInt face_orient_1[] = {0, 0}; 722 const PetscInt face_orient_2[] = {-1, 0, 0, -1}; 723 const PetscInt face_orient_3[] = {-2, 0, -2, 1, -2, 0}; 724 const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3}; 725 726 PetscFunctionBegin; 727 PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0)); 728 PetscAssertPointer(dm, 1); 729 PetscValidLogicalCollectiveInt(dm, dim, 2); 730 PetscCall(DMSetDimension(dm, dim)); 731 PetscMPIInt rank, size; 732 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size)); 733 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 734 for (PetscInt i = 0; i < dim; i++) { 735 eextent[i] = faces[i]; 736 vextent[i] = faces[i] + 1; 737 } 738 ZLayout layout; 739 PetscCall(ZLayoutCreate(size, eextent, vextent, &layout)); 740 PetscZSet vset; // set of all vertices in the closure of the owned elements 741 PetscCall(PetscZSetCreate(&vset)); 742 PetscInt local_elems = 0; 743 for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) { 744 Ijk loc = ZCodeSplit(z); 745 if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z)); 746 else { 747 z += ZStepOct(z); 748 continue; 749 } 750 if (IjkActive(layout.eextent, loc)) { 751 local_elems++; 752 // Add all neighboring vertices to set 753 for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) { 754 Ijk inc = closure_dim[dim][n]; 755 Ijk nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k}; 756 ZCode v = ZEncode(nloc); 757 PetscCall(PetscZSetAdd(vset, v)); 758 } 759 } 760 } 761 PetscInt local_verts, off = 0; 762 ZCode *vert_z; 763 PetscCall(PetscZSetGetSize(vset, &local_verts)); 764 PetscCall(PetscMalloc1(local_verts, &vert_z)); 765 PetscCall(PetscZSetGetElems(vset, &off, vert_z)); 766 PetscCall(PetscZSetDestroy(&vset)); 767 // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed 768 PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z)); 769 770 PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts)); 771 for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim))); 772 PetscCall(DMSetUp(dm)); 773 { 774 PetscInt e = 0; 775 for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) { 776 Ijk loc = ZCodeSplit(z); 777 if (!IjkActive(layout.eextent, loc)) { 778 z += ZStepOct(z); 779 continue; 780 } 781 PetscInt cone[8], orient[8] = {0}; 782 for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) { 783 Ijk inc = closure_dim[dim][n]; 784 Ijk nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k}; 785 ZCode v = ZEncode(nloc); 786 PetscInt ci = ZCodeFind(v, local_verts, vert_z); 787 PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set"); 788 cone[n] = local_elems + ci; 789 } 790 PetscCall(DMPlexSetCone(dm, e, cone)); 791 PetscCall(DMPlexSetConeOrientation(dm, e, orient)); 792 e++; 793 } 794 } 795 796 PetscCall(DMPlexSymmetrize(dm)); 797 PetscCall(DMPlexStratify(dm)); 798 799 { // Create point SF 800 PetscSF sf; 801 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf)); 802 PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z); 803 if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found 804 PetscInt num_ghosts = local_verts - owned_verts; // Due to sorting, owned vertices always come first 805 PetscInt *local_ghosts; 806 PetscSFNode *ghosts; 807 PetscCall(PetscMalloc1(num_ghosts, &local_ghosts)); 808 PetscCall(PetscMalloc1(num_ghosts, &ghosts)); 809 for (PetscInt i = 0; i < num_ghosts;) { 810 ZCode z = vert_z[owned_verts + i]; 811 PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0; 812 if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1; 813 // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z) 814 815 // Count the elements on remote_rank 816 PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank); 817 818 // Traverse vertices and make ghost links 819 for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) { 820 Ijk loc = ZCodeSplit(rz); 821 if (rz == z) { 822 local_ghosts[i] = local_elems + owned_verts + i; 823 ghosts[i].rank = remote_rank; 824 ghosts[i].index = remote_elem + remote_count; 825 i++; 826 if (i == num_ghosts) break; 827 z = vert_z[owned_verts + i]; 828 } 829 if (IjkActive(layout.vextent, loc)) remote_count++; 830 else rz += ZStepOct(rz); 831 } 832 } 833 PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER)); 834 PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF")); 835 PetscCall(DMSetPointSF(dm, sf)); 836 PetscCall(PetscSFDestroy(&sf)); 837 } 838 { 839 Vec coordinates; 840 PetscScalar *coords; 841 PetscSection coord_section; 842 PetscInt coord_size; 843 PetscCall(DMGetCoordinateSection(dm, &coord_section)); 844 PetscCall(PetscSectionSetNumFields(coord_section, 1)); 845 PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim)); 846 PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts)); 847 for (PetscInt v = 0; v < local_verts; v++) { 848 PetscInt point = local_elems + v; 849 PetscCall(PetscSectionSetDof(coord_section, point, dim)); 850 PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim)); 851 } 852 PetscCall(PetscSectionSetUp(coord_section)); 853 PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size)); 854 PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates)); 855 PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates")); 856 PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE)); 857 PetscCall(VecSetBlockSize(coordinates, dim)); 858 PetscCall(VecSetType(coordinates, VECSTANDARD)); 859 PetscCall(VecGetArray(coordinates, &coords)); 860 for (PetscInt v = 0; v < local_verts; v++) { 861 Ijk loc = ZCodeSplit(vert_z[v]); 862 coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i; 863 if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j; 864 if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k; 865 } 866 PetscCall(VecRestoreArray(coordinates, &coords)); 867 PetscCall(DMSetCoordinatesLocal(dm, coordinates)); 868 PetscCall(VecDestroy(&coordinates)); 869 } 870 if (interpolate) { 871 PetscCall(DMPlexInterpolateInPlace_Internal(dm)); 872 // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot 873 // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the 874 // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make 875 // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might 876 // be needed in a general CGNS reader, for example. 877 PetscCall(DMPlexOrientPositiveEdges_Private(dm)); 878 879 DMLabel label; 880 PetscCall(DMCreateLabel(dm, "Face Sets")); 881 PetscCall(DMGetLabel(dm, "Face Sets", &label)); 882 PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3]; 883 for (PetscInt i = 0; i < 3; i++) { 884 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i])); 885 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i])); 886 PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i])); 887 } 888 PetscInt fStart, fEnd, vStart, vEnd; 889 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 890 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 891 for (PetscInt f = fStart; f < fEnd; f++) { 892 PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8]; 893 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points)); 894 PetscInt bc_count[6] = {0}; 895 for (PetscInt i = 0; i < npoints; i++) { 896 PetscInt p = points[2 * i]; 897 if (p < vStart || vEnd <= p) continue; 898 fverts[num_fverts++] = p; 899 Ijk loc = ZCodeSplit(vert_z[p - vStart]); 900 // Convention here matches DMPlexCreateCubeMesh_Internal 901 bc_count[0] += loc.i == 0; 902 bc_count[1] += loc.i == layout.vextent.i - 1; 903 bc_count[2] += loc.j == 0; 904 bc_count[3] += loc.j == layout.vextent.j - 1; 905 bc_count[4] += loc.k == 0; 906 bc_count[5] += loc.k == layout.vextent.k - 1; 907 } 908 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points)); 909 for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) { 910 if (bc_count[bc] == PetscPowInt(2, dim - 1)) { 911 PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc])); 912 if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) { 913 PetscInt *put; 914 if (bc % 2 == 0) { // donor face; no label 915 PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put)); 916 *put = f; 917 } else { // periodic face 918 PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put)); 919 *put = f; 920 ZCode *zput; 921 PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput)); 922 for (PetscInt i = 0; i < num_fverts; i++) { 923 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]); 924 switch (bc / 2) { 925 case 0: 926 loc.i = 0; 927 break; 928 case 1: 929 loc.j = 0; 930 break; 931 case 2: 932 loc.k = 0; 933 break; 934 } 935 *zput++ = ZEncode(loc); 936 } 937 } 938 continue; 939 } 940 PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets"); 941 PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc])); 942 bc_match++; 943 } 944 } 945 } 946 // Ensure that the Coordinate DM has our new boundary labels 947 DM cdm; 948 PetscCall(DMGetCoordinateDM(dm, &cdm)); 949 PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL)); 950 if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) { 951 PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces)); 952 } 953 for (PetscInt i = 0; i < 3; i++) { 954 PetscCall(PetscSegBufferDestroy(&per_faces[i])); 955 PetscCall(PetscSegBufferDestroy(&donor_face_closure[i])); 956 PetscCall(PetscSegBufferDestroy(&my_donor_faces[i])); 957 } 958 } 959 PetscCall(PetscFree(layout.zstarts)); 960 PetscCall(PetscFree(vert_z)); 961 PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0)); 962 PetscFunctionReturn(PETSC_SUCCESS); 963 } 964 965 /*@ 966 DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh 967 968 Logically Collective 969 970 Input Parameters: 971 + dm - The `DMPLEX` on which to set periodicity 972 . num_face_sfs - Number of `PetscSF`s in `face_sfs` 973 - face_sfs - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face. 974 975 Level: advanced 976 977 Note: 978 One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally 979 and locally, but are paired when creating a global dof space. 980 981 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()` 982 @*/ 983 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs) 984 { 985 DM_Plex *plex = (DM_Plex *)dm->data; 986 987 PetscFunctionBegin; 988 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 989 if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex)); 990 if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 991 992 for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i])); 993 994 if (plex->periodic.num_face_sfs > 0) { 995 for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i])); 996 PetscCall(PetscFree(plex->periodic.face_sfs)); 997 } 998 999 plex->periodic.num_face_sfs = num_face_sfs; 1000 PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs)); 1001 for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i]; 1002 1003 DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one 1004 if (cdm) { 1005 PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs)); 1006 if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal; 1007 } 1008 PetscFunctionReturn(PETSC_SUCCESS); 1009 } 1010 1011 /*@C 1012 DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh 1013 1014 Logically Collective 1015 1016 Input Parameter: 1017 . dm - The `DMPLEX` for which to obtain periodic relation 1018 1019 Output Parameters: 1020 + num_face_sfs - Number of `PetscSF`s in the array 1021 - face_sfs - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face. 1022 1023 Level: advanced 1024 1025 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()` 1026 @*/ 1027 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs) 1028 { 1029 DM_Plex *plex = (DM_Plex *)dm->data; 1030 1031 PetscFunctionBegin; 1032 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1033 *face_sfs = plex->periodic.face_sfs; 1034 *num_face_sfs = plex->periodic.num_face_sfs; 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } 1037 1038 /*@C 1039 DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points 1040 1041 Logically Collective 1042 1043 Input Parameters: 1044 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()` 1045 . n - Number of transforms in array 1046 - t - Array of 4x4 affine transformation basis. 1047 1048 Level: advanced 1049 1050 Notes: 1051 Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation), 1052 the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always 1053 be 1. This representation is common in geometric modeling and allows affine transformations to be composed using 1054 simple matrix multiplication. 1055 1056 Although the interface accepts a general affine transform, only affine translation is supported at present. 1057 1058 Developer Notes: 1059 This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and 1060 adding GPU implementations to apply the G2L/L2G transforms. 1061 1062 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()` 1063 @*/ 1064 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[]) 1065 { 1066 DM_Plex *plex = (DM_Plex *)dm->data; 1067 1068 PetscFunctionBegin; 1069 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1070 PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs); 1071 1072 PetscCall(PetscMalloc1(n, &plex->periodic.transform)); 1073 for (PetscInt i = 0; i < n; i++) { 1074 for (PetscInt j = 0; j < 4; j++) { 1075 for (PetscInt k = 0; k < 4; k++) { 1076 PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported"); 1077 plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k]; 1078 } 1079 } 1080 } 1081 PetscFunctionReturn(PETSC_SUCCESS); 1082 } 1083