1 #include <petsc/private/dmpleximpl.h> /*I "petscdmplex.h" I*/ 2 #include <petscsf.h> 3 #include <petsc/private/hashset.h> 4 5 typedef uint64_t ZCode; 6 7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual) 8 9 typedef struct { 10 PetscInt i, j, k; 11 } Ijk; 12 13 typedef struct { 14 Ijk eextent; 15 Ijk vextent; 16 PetscMPIInt comm_size; 17 ZCode *zstarts; 18 } ZLayout; 19 20 // ***** Overview of ZCode ******* 21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index. 22 // This is known as Morton encoding, and is refered to as ZCode in this code. 23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be: 24 // [k2,j2,i2,k1,j1,i1,k0,j0,i0] 25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`). 26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices. 27 28 // Decodes the leading interleaved index from a ZCode 29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0] 30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 31 static unsigned ZCodeSplit1(ZCode z) 32 { 33 z &= 0x1249249249249249; 34 z = (z | z >> 2) & 0x10c30c30c30c30c3; 35 z = (z | z >> 4) & 0x100f00f00f00f00f; 36 z = (z | z >> 8) & 0x1f0000ff0000ff; 37 z = (z | z >> 16) & 0x1f00000000ffff; 38 z = (z | z >> 32) & 0x1fffff; 39 return (unsigned)z; 40 } 41 42 // Encodes the leading interleaved index from a ZCode 43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0] 44 static ZCode ZEncode1(unsigned t) 45 { 46 ZCode z = t; 47 z &= 0x1fffff; 48 z = (z | z << 32) & 0x1f00000000ffff; 49 z = (z | z << 16) & 0x1f0000ff0000ff; 50 z = (z | z << 8) & 0x100f00f00f00f00f; 51 z = (z | z << 4) & 0x10c30c30c30c30c3; 52 z = (z | z << 2) & 0x1249249249249249; 53 return z; 54 } 55 56 // Decodes i j k indices from a ZCode. 57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode 58 static Ijk ZCodeSplit(ZCode z) 59 { 60 Ijk c; 61 c.i = ZCodeSplit1(z >> 2); 62 c.j = ZCodeSplit1(z >> 1); 63 c.k = ZCodeSplit1(z >> 0); 64 return c; 65 } 66 67 // Encodes i j k indices to a ZCode. 68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement 69 static ZCode ZEncode(Ijk c) 70 { 71 ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k); 72 return z; 73 } 74 75 static PetscBool IjkActive(Ijk extent, Ijk l) 76 { 77 if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE; 78 return PETSC_FALSE; 79 } 80 81 // If z is not the base of an octet (last 3 bits 0), return 0. 82 // 83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z 84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point. 85 static ZCode ZStepOct(ZCode z) 86 { 87 if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0 88 ZCode step = 07; 89 for (; (z & step) == 0; step = (step << 3) | 07) { } 90 return step >> 3; 91 } 92 93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous. 94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout) 95 { 96 PetscFunctionBegin; 97 layout->eextent.i = eextent[0]; 98 layout->eextent.j = eextent[1]; 99 layout->eextent.k = eextent[2]; 100 layout->vextent.i = vextent[0]; 101 layout->vextent.j = vextent[1]; 102 layout->vextent.k = vextent[2]; 103 layout->comm_size = size; 104 layout->zstarts = NULL; 105 PetscCall(PetscMalloc1(size + 1, &layout->zstarts)); 106 107 PetscInt total_elems = eextent[0] * eextent[1] * eextent[2]; 108 ZCode z = 0; 109 layout->zstarts[0] = 0; 110 // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound 111 for (PetscMPIInt r = 0; r < size; r++) { 112 PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count; 113 for (count = 0; count < elems_needed; z++) { 114 ZCode skip = ZStepOct(z); // optimistically attempt a longer step 115 for (ZCode s = skip;; s >>= 3) { 116 Ijk trial = ZCodeSplit(z + s); 117 if (IjkActive(layout->eextent, trial)) { 118 while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet 119 count += s + 1; 120 z += s; 121 break; 122 } 123 if (s == 0) { // the whole skip octet is inactive 124 z += skip; 125 break; 126 } 127 } 128 } 129 // Pick up any extra vertices in the Z ordering before the next rank's first owned element. 130 // 131 // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up 132 // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will 133 // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of 134 // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution). 135 // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would 136 // complicate the job of identifying an owner and its offset. 137 // 138 // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is 139 // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's 140 // the issue: 141 // 142 // Consider this partition on rank 0 (left) and rank 1. 143 // 144 // 4 -------- 5 -- 14 --10 -- 21 --11 145 // | | | 146 // 7 -- 16 -- 8 | | | 147 // | | 3 ------- 7 ------- 9 148 // | | | | 149 // 4 -------- 6 ------ 10 | | 150 // | | | 6 -- 16 -- 8 151 // | | | 152 // 3 ---11--- 5 --18-- 9 153 // 154 // The periodic face SF looks like 155 // [0] Number of roots=21, leaves=1, remote ranks=1 156 // [0] 16 <- (0,11) 157 // [1] Number of roots=22, leaves=2, remote ranks=2 158 // [1] 14 <- (0,18) 159 // [1] 21 <- (1,16) 160 // 161 // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use 162 // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face 163 // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16). 164 // 165 // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not 166 // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer 167 // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4) 168 // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the 169 // stranded vertices. 170 for (; z <= ZEncode(layout->vextent); z++) { 171 Ijk loc = ZCodeSplit(z); 172 if (IjkActive(layout->eextent, loc)) break; 173 z += ZStepOct(z); 174 } 175 layout->zstarts[r + 1] = z; 176 } 177 layout->zstarts[size] = ZEncode(layout->vextent); 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank) 182 { 183 PetscInt remote_elem = 0; 184 for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) { 185 Ijk loc = ZCodeSplit(rz); 186 if (IjkActive(layout->eextent, loc)) remote_elem++; 187 else rz += ZStepOct(rz); 188 } 189 return remote_elem; 190 } 191 192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[]) 193 { 194 PetscInt lo = 0, hi = n; 195 196 if (n == 0) return -1; 197 while (hi - lo > 1) { 198 PetscInt mid = lo + (hi - lo) / 2; 199 if (key < X[mid]) hi = mid; 200 else lo = mid; 201 } 202 return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1); 203 } 204 205 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3]) 206 { 207 MPI_Comm comm; 208 PetscInt dim, vStart, vEnd; 209 PetscMPIInt size; 210 PetscSF face_sfs[3]; 211 PetscScalar transforms[3][4][4] = {{{0}}}; 212 213 PetscFunctionBegin; 214 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 215 PetscCallMPI(MPI_Comm_size(comm, &size)); 216 PetscCall(DMGetDimension(dm, &dim)); 217 const PetscInt csize = PetscPowInt(2, dim - 1); 218 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 219 220 PetscInt num_directions = 0; 221 for (PetscInt direction = 0; direction < dim; direction++) { 222 PetscCount num_faces; 223 PetscInt *faces; 224 ZCode *donor_verts, *donor_minz; 225 PetscSFNode *leaf; 226 PetscCount num_multiroots = 0; 227 PetscInt pStart, pEnd; 228 PetscBool sorted; 229 PetscInt inum_faces; 230 231 if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue; 232 PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces)); 233 PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces)); 234 PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts)); 235 PetscCall(PetscMalloc1(num_faces, &donor_minz)); 236 PetscCall(PetscMalloc1(num_faces, &leaf)); 237 for (PetscCount i = 0; i < num_faces; i++) { 238 ZCode minz = donor_verts[i * csize]; 239 240 for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]); 241 donor_minz[i] = minz; 242 } 243 PetscCall(PetscIntCast(num_faces, &inum_faces)); 244 PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted)); 245 // If a donor vertex were chosen to broker multiple faces, we would have a logic error. 246 // Checking for sorting is a cheap check that there are no duplicates. 247 PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked"); 248 for (PetscCount i = 0; i < num_faces;) { 249 ZCode z = donor_minz[i]; 250 PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0; 251 252 if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1; 253 // Process all the vertices on this rank 254 for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) { 255 Ijk loc = ZCodeSplit(rz); 256 257 if (rz == z) { 258 leaf[i].rank = remote_rank; 259 leaf[i].index = remote_count; 260 i++; 261 if (i == num_faces) break; 262 z = donor_minz[i]; 263 } 264 if (IjkActive(layout->vextent, loc)) remote_count++; 265 } 266 } 267 PetscCall(PetscFree(donor_minz)); 268 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions])); 269 PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER)); 270 const PetscInt *my_donor_degree; 271 PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree)); 272 PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree)); 273 274 for (PetscInt i = 0; i < vEnd - vStart; i++) { 275 num_multiroots += my_donor_degree[i]; 276 if (my_donor_degree[i] == 0) continue; 277 PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces"); 278 } 279 PetscInt *my_donors, *donor_indices, *my_donor_indices; 280 PetscCount num_my_donors; 281 282 PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors)); 283 PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots); 284 PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors)); 285 PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices)); 286 for (PetscCount i = 0; i < num_my_donors; i++) { 287 PetscInt f = my_donors[i]; 288 PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX; 289 290 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points)); 291 for (PetscInt j = 0; j < num_points; j++) { 292 PetscInt p = points[2 * j]; 293 if (p < vStart || vEnd <= p) continue; 294 minv = PetscMin(minv, p); 295 } 296 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points)); 297 PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested"); 298 my_donor_indices[minv - vStart] = f; 299 } 300 PetscCall(PetscMalloc1(num_faces, &donor_indices)); 301 PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE)); 302 PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE)); 303 PetscCall(PetscFree(my_donor_indices)); 304 // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces. 305 for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i]; 306 PetscCall(PetscFree(donor_indices)); 307 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 308 PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER)); 309 { 310 char face_sf_name[PETSC_MAX_PATH_LEN]; 311 PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions)); 312 PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name)); 313 } 314 315 transforms[num_directions][0][0] = 1; 316 transforms[num_directions][1][1] = 1; 317 transforms[num_directions][2][2] = 1; 318 transforms[num_directions][3][3] = 1; 319 transforms[num_directions][direction][3] = upper[direction] - lower[direction]; 320 num_directions++; 321 } 322 323 PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs)); 324 PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms)); 325 326 for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i])); 327 PetscFunctionReturn(PETSC_SUCCESS); 328 } 329 330 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to 331 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector). 332 // We use this crude approach here so we don't have to write new GPU kernels yet. 333 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx) 334 { 335 PetscFunctionBegin; 336 // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped. 337 for (PetscInt i = 0; i < dm->periodic.num_affines; i++) { 338 PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD)); 339 PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD)); 340 } 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure 345 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures 346 // are isomorphic. It may be useful/necessary for this restriction to be loosened. 347 // 348 // Output Arguments: 349 // 350 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This 351 // can be used to create a global section and section SF. 352 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine 353 // transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal(). 354 // 355 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points) 356 { 357 MPI_Comm comm; 358 PetscMPIInt rank; 359 PetscSF point_sf; 360 PetscInt nroots, nleaves; 361 const PetscInt *filocal; 362 const PetscSFNode *firemote; 363 364 PetscFunctionBegin; 365 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 366 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 367 PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points 368 PetscCall(PetscMalloc1(num_face_sfs, is_points)); 369 370 for (PetscInt f = 0; f < num_face_sfs; f++) { 371 PetscSF face_sf = face_sfs[f]; 372 PetscInt *rootdata, *leafdata; 373 374 PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote)); 375 PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata)); 376 for (PetscInt i = 0; i < nleaves; i++) { 377 PetscInt point = filocal[i], cl_size, *closure = NULL; 378 PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 379 leafdata[point] = cl_size - 1; 380 PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 381 } 382 PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM)); 383 PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM)); 384 385 PetscInt root_offset = 0; 386 for (PetscInt p = 0; p < nroots; p++) { 387 const PetscInt *donor_dof = rootdata + nroots; 388 if (donor_dof[p] == 0) { 389 rootdata[2 * p] = -1; 390 rootdata[2 * p + 1] = -1; 391 continue; 392 } 393 PetscInt cl_size; 394 PetscInt *closure = NULL; 395 PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 396 // cl_size - 1 = points not including self 397 PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes"); 398 rootdata[2 * p] = root_offset; 399 rootdata[2 * p + 1] = cl_size - 1; 400 root_offset += cl_size - 1; 401 PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 402 } 403 PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE)); 404 PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE)); 405 // Count how many leaves we need to communicate the closures 406 PetscInt leaf_offset = 0; 407 for (PetscInt i = 0; i < nleaves; i++) { 408 PetscInt point = filocal[i]; 409 if (leafdata[2 * point + 1] < 0) continue; 410 leaf_offset += leafdata[2 * point + 1]; 411 } 412 413 PetscSFNode *closure_leaf; 414 PetscCall(PetscMalloc1(leaf_offset, &closure_leaf)); 415 leaf_offset = 0; 416 for (PetscInt i = 0; i < nleaves; i++) { 417 PetscInt point = filocal[i]; 418 PetscInt cl_size = leafdata[2 * point + 1]; 419 if (cl_size < 0) continue; 420 for (PetscInt j = 0; j < cl_size; j++) { 421 closure_leaf[leaf_offset].rank = firemote[i].rank; 422 closure_leaf[leaf_offset].index = leafdata[2 * point] + j; 423 leaf_offset++; 424 } 425 } 426 427 PetscSF sf_closure; 428 PetscCall(PetscSFCreate(comm, &sf_closure)); 429 PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER)); 430 431 PetscSFNode *leaf_donor_closure; 432 { // Pack root buffer with owner for every point in the root cones 433 PetscSFNode *donor_closure; 434 const PetscInt *pilocal; 435 const PetscSFNode *piremote; 436 PetscInt npoints; 437 438 PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote)); 439 PetscCall(PetscCalloc1(root_offset, &donor_closure)); 440 root_offset = 0; 441 for (PetscInt p = 0; p < nroots; p++) { 442 if (rootdata[2 * p] < 0) continue; 443 PetscInt cl_size; 444 PetscInt *closure = NULL; 445 PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 446 for (PetscInt j = 1; j < cl_size; j++) { 447 PetscInt c = closure[2 * j]; 448 if (pilocal) { 449 PetscInt found = -1; 450 if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found)); 451 if (found >= 0) { 452 donor_closure[root_offset++] = piremote[found]; 453 continue; 454 } 455 } 456 // we own c 457 donor_closure[root_offset].rank = rank; 458 donor_closure[root_offset].index = c; 459 root_offset++; 460 } 461 PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 462 } 463 464 PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure)); 465 PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE)); 466 PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE)); 467 PetscCall(PetscSFDestroy(&sf_closure)); 468 PetscCall(PetscFree(donor_closure)); 469 } 470 471 PetscSFNode *new_iremote; 472 PetscCall(PetscCalloc1(nroots, &new_iremote)); 473 for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1; 474 // Walk leaves and match vertices 475 leaf_offset = 0; 476 for (PetscInt i = 0; i < nleaves; i++) { 477 PetscInt point = filocal[i], cl_size; 478 PetscInt *closure = NULL; 479 PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 480 for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency? 481 PetscInt c = closure[2 * j]; 482 PetscSFNode lc = leaf_donor_closure[leaf_offset]; 483 // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index); 484 if (new_iremote[c].rank == -1) { 485 new_iremote[c] = lc; 486 } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces"); 487 leaf_offset++; 488 } 489 PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 490 } 491 PetscCall(PetscFree(leaf_donor_closure)); 492 493 // Include face points in closure SF 494 for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i]; 495 // consolidate leaves 496 PetscInt num_new_leaves = 0; 497 for (PetscInt i = 0; i < nroots; i++) { 498 if (new_iremote[i].rank == -1) continue; 499 new_iremote[num_new_leaves] = new_iremote[i]; 500 leafdata[num_new_leaves] = i; 501 num_new_leaves++; 502 } 503 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f])); 504 505 PetscSF csf; 506 PetscCall(PetscSFCreate(comm, &csf)); 507 PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES)); 508 PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be 509 PetscCall(PetscFree2(rootdata, leafdata)); 510 511 PetscInt npoints; 512 PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL)); 513 if (npoints < 0) { // empty point_sf 514 *closure_sf = csf; 515 } else { 516 PetscCall(PetscSFMerge(point_sf, csf, closure_sf)); 517 PetscCall(PetscSFDestroy(&csf)); 518 } 519 if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge 520 point_sf = *closure_sf; // Use combined point + isoperiodic SF to define point ownership for further face_sf 521 } 522 PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points")); 523 PetscFunctionReturn(PETSC_SUCCESS); 524 } 525 526 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf) 527 { 528 DM_Plex *plex = (DM_Plex *)dm->data; 529 530 PetscFunctionBegin; 531 if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points)); 532 if (sf) *sf = plex->periodic.composed_sf; 533 PetscFunctionReturn(PETSC_SUCCESS); 534 } 535 536 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration) 537 { 538 DM_Plex *plex = (DM_Plex *)old_dm->data; 539 PetscSF sf_point, *new_face_sfs; 540 PetscMPIInt rank; 541 542 PetscFunctionBegin; 543 if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 544 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 545 PetscCall(DMGetPointSF(dm, &sf_point)); 546 PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs)); 547 548 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) { 549 PetscInt old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf; 550 PetscSFNode *new_leafdata, *rootdata, *leafdata; 551 const PetscInt *old_local, *point_local; 552 const PetscSFNode *old_remote, *point_remote; 553 554 PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote)); 555 PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL)); 556 PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote)); 557 PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space"); 558 PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata)); 559 560 // Fill new_leafdata with new owners of all points 561 for (PetscInt i = 0; i < new_npoints; i++) { 562 new_leafdata[i].rank = rank; 563 new_leafdata[i].index = i; 564 } 565 for (PetscInt i = 0; i < point_nleaf; i++) { 566 PetscInt j = point_local[i]; 567 new_leafdata[j] = point_remote[i]; 568 } 569 // REPLACE is okay because every leaf agrees about the new owners 570 PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE)); 571 PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE)); 572 // rootdata now contains the new owners 573 574 // Send to leaves of old space 575 for (PetscInt i = 0; i < old_npoints; i++) { 576 leafdata[i].rank = -1; 577 leafdata[i].index = -1; 578 } 579 PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE)); 580 PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE)); 581 582 // Send to new leaf space 583 PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE)); 584 PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE)); 585 586 PetscInt nface = 0, *new_local; 587 PetscSFNode *new_remote; 588 for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0); 589 PetscCall(PetscMalloc1(nface, &new_local)); 590 PetscCall(PetscMalloc1(nface, &new_remote)); 591 nface = 0; 592 for (PetscInt i = 0; i < new_npoints; i++) { 593 if (new_leafdata[i].rank == -1) continue; 594 new_local[nface] = i; 595 new_remote[nface] = new_leafdata[i]; 596 nface++; 597 } 598 PetscCall(PetscFree3(rootdata, leafdata, new_leafdata)); 599 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f])); 600 PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER)); 601 { 602 char new_face_sf_name[PETSC_MAX_PATH_LEN]; 603 PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f)); 604 PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name)); 605 } 606 } 607 608 PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs)); 609 PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform)); 610 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f])); 611 PetscCall(PetscFree(new_face_sfs)); 612 PetscFunctionReturn(PETSC_SUCCESS); 613 } 614 615 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm) 616 { 617 DM_Plex *plex = (DM_Plex *)dm->data; 618 PetscCount count; 619 IS isdof; 620 PetscInt dim; 621 622 PetscFunctionBegin; 623 if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 624 PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex)); 626 627 PetscCall(DMGetDimension(dm, &dim)); 628 dm->periodic.num_affines = plex->periodic.num_face_sfs; 629 PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine)); 630 631 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) { 632 PetscInt npoints, vsize, isize; 633 const PetscInt *points; 634 IS is = plex->periodic.periodic_points[f]; 635 PetscSegBuffer seg; 636 PetscSection section; 637 PetscInt *ind; 638 Vec L, P; 639 VecType vec_type; 640 VecScatter scatter; 641 PetscScalar *x; 642 643 PetscCall(DMGetLocalSection(dm, §ion)); 644 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg)); 645 PetscCall(ISGetSize(is, &npoints)); 646 PetscCall(ISGetIndices(is, &points)); 647 for (PetscInt i = 0; i < npoints; i++) { 648 PetscInt point = points[i], off, dof; 649 650 PetscCall(PetscSectionGetOffset(section, point, &off)); 651 PetscCall(PetscSectionGetDof(section, point, &dof)); 652 PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim); 653 for (PetscInt j = 0; j < dof / dim; j++) { 654 PetscInt *slot; 655 656 PetscCall(PetscSegBufferGetInts(seg, 1, &slot)); 657 *slot = off / dim + j; 658 } 659 } 660 PetscCall(PetscSegBufferGetSize(seg, &count)); 661 PetscCall(PetscSegBufferExtractAlloc(seg, &ind)); 662 PetscCall(PetscSegBufferDestroy(&seg)); 663 PetscCall(PetscIntCast(count, &isize)); 664 PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof)); 665 666 PetscCall(PetscIntCast(count * dim, &vsize)); 667 PetscCall(DMGetLocalVector(dm, &L)); 668 PetscCall(VecCreate(PETSC_COMM_SELF, &P)); 669 PetscCall(VecSetSizes(P, vsize, vsize)); 670 PetscCall(VecGetType(L, &vec_type)); 671 PetscCall(VecSetType(P, vec_type)); 672 PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter)); 673 PetscCall(DMRestoreLocalVector(dm, &L)); 674 PetscCall(ISDestroy(&isdof)); 675 676 PetscCall(VecGetArrayWrite(P, &x)); 677 for (PetscCount i = 0; i < count; i++) { 678 for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3]; 679 } 680 PetscCall(VecRestoreArrayWrite(P, &x)); 681 682 dm->periodic.affine_to_local[f] = scatter; 683 dm->periodic.affine[f] = P; 684 } 685 PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL)); 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 // We'll just orient all the edges, though only periodic boundary edges need orientation 690 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm) 691 { 692 PetscInt dim, eStart, eEnd; 693 694 PetscFunctionBegin; 695 PetscCall(DMGetDimension(dm, &dim)); 696 if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary 697 PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd)); 698 for (PetscInt e = eStart; e < eEnd; e++) { 699 const PetscInt *cone; 700 PetscCall(DMPlexGetCone(dm, e, &cone)); 701 if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1)); 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate) 707 { 708 PetscInt eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1}; 709 const Ijk closure_1[] = { 710 {0, 0, 0}, 711 {1, 0, 0}, 712 }; 713 const Ijk closure_2[] = { 714 {0, 0, 0}, 715 {1, 0, 0}, 716 {1, 1, 0}, 717 {0, 1, 0}, 718 }; 719 const Ijk closure_3[] = { 720 {0, 0, 0}, 721 {0, 1, 0}, 722 {1, 1, 0}, 723 {1, 0, 0}, 724 {0, 0, 1}, 725 {1, 0, 1}, 726 {1, 1, 1}, 727 {0, 1, 1}, 728 }; 729 const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3}; 730 // This must be kept consistent with DMPlexCreateCubeMesh_Internal 731 const PetscInt face_marker_1[] = {1, 2}; 732 const PetscInt face_marker_2[] = {4, 2, 1, 3}; 733 const PetscInt face_marker_3[] = {6, 5, 3, 4, 1, 2}; 734 const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3}; 735 // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero. 736 // These orientations can be determined by examining cones of a reference quad and hex element. 737 const PetscInt face_orient_1[] = {0, 0}; 738 const PetscInt face_orient_2[] = {-1, 0, 0, -1}; 739 const PetscInt face_orient_3[] = {-2, 0, -2, 1, -2, 0}; 740 const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3}; 741 742 PetscFunctionBegin; 743 PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0)); 744 PetscAssertPointer(dm, 1); 745 PetscValidLogicalCollectiveInt(dm, dim, 2); 746 PetscCall(DMSetDimension(dm, dim)); 747 PetscMPIInt rank, size; 748 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size)); 749 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 750 for (PetscInt i = 0; i < dim; i++) { 751 eextent[i] = faces[i]; 752 vextent[i] = faces[i] + 1; 753 } 754 ZLayout layout; 755 PetscCall(ZLayoutCreate(size, eextent, vextent, &layout)); 756 PetscZSet vset; // set of all vertices in the closure of the owned elements 757 PetscCall(PetscZSetCreate(&vset)); 758 PetscInt local_elems = 0; 759 for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) { 760 Ijk loc = ZCodeSplit(z); 761 if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z)); 762 else { 763 z += ZStepOct(z); 764 continue; 765 } 766 if (IjkActive(layout.eextent, loc)) { 767 local_elems++; 768 // Add all neighboring vertices to set 769 for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) { 770 Ijk inc = closure_dim[dim][n]; 771 Ijk nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k}; 772 ZCode v = ZEncode(nloc); 773 PetscCall(PetscZSetAdd(vset, v)); 774 } 775 } 776 } 777 PetscInt local_verts, off = 0; 778 ZCode *vert_z; 779 PetscCall(PetscZSetGetSize(vset, &local_verts)); 780 PetscCall(PetscMalloc1(local_verts, &vert_z)); 781 PetscCall(PetscZSetGetElems(vset, &off, vert_z)); 782 PetscCall(PetscZSetDestroy(&vset)); 783 // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed 784 PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z)); 785 786 PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts)); 787 for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim))); 788 PetscCall(DMSetUp(dm)); 789 { 790 PetscInt e = 0; 791 for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) { 792 Ijk loc = ZCodeSplit(z); 793 if (!IjkActive(layout.eextent, loc)) { 794 z += ZStepOct(z); 795 continue; 796 } 797 PetscInt cone[8], orient[8] = {0}; 798 for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) { 799 Ijk inc = closure_dim[dim][n]; 800 Ijk nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k}; 801 ZCode v = ZEncode(nloc); 802 PetscInt ci = ZCodeFind(v, local_verts, vert_z); 803 PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set"); 804 cone[n] = local_elems + ci; 805 } 806 PetscCall(DMPlexSetCone(dm, e, cone)); 807 PetscCall(DMPlexSetConeOrientation(dm, e, orient)); 808 e++; 809 } 810 } 811 812 PetscCall(DMPlexSymmetrize(dm)); 813 PetscCall(DMPlexStratify(dm)); 814 815 { // Create point SF 816 PetscSF sf; 817 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf)); 818 PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z); 819 if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found 820 PetscInt num_ghosts = local_verts - owned_verts; // Due to sorting, owned vertices always come first 821 PetscInt *local_ghosts; 822 PetscSFNode *ghosts; 823 PetscCall(PetscMalloc1(num_ghosts, &local_ghosts)); 824 PetscCall(PetscMalloc1(num_ghosts, &ghosts)); 825 for (PetscInt i = 0; i < num_ghosts;) { 826 ZCode z = vert_z[owned_verts + i]; 827 PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0; 828 if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1; 829 // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z) 830 831 // Count the elements on remote_rank 832 PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank); 833 834 // Traverse vertices and make ghost links 835 for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) { 836 Ijk loc = ZCodeSplit(rz); 837 if (rz == z) { 838 local_ghosts[i] = local_elems + owned_verts + i; 839 ghosts[i].rank = remote_rank; 840 ghosts[i].index = remote_elem + remote_count; 841 i++; 842 if (i == num_ghosts) break; 843 z = vert_z[owned_verts + i]; 844 } 845 if (IjkActive(layout.vextent, loc)) remote_count++; 846 else rz += ZStepOct(rz); 847 } 848 } 849 PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER)); 850 PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF")); 851 PetscCall(DMSetPointSF(dm, sf)); 852 PetscCall(PetscSFDestroy(&sf)); 853 } 854 { 855 Vec coordinates; 856 PetscScalar *coords; 857 PetscSection coord_section; 858 PetscInt coord_size; 859 PetscCall(DMGetCoordinateSection(dm, &coord_section)); 860 PetscCall(PetscSectionSetNumFields(coord_section, 1)); 861 PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim)); 862 PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts)); 863 for (PetscInt v = 0; v < local_verts; v++) { 864 PetscInt point = local_elems + v; 865 PetscCall(PetscSectionSetDof(coord_section, point, dim)); 866 PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim)); 867 } 868 PetscCall(PetscSectionSetUp(coord_section)); 869 PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size)); 870 PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates)); 871 PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates")); 872 PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE)); 873 PetscCall(VecSetBlockSize(coordinates, dim)); 874 PetscCall(VecSetType(coordinates, VECSTANDARD)); 875 PetscCall(VecGetArray(coordinates, &coords)); 876 for (PetscInt v = 0; v < local_verts; v++) { 877 Ijk loc = ZCodeSplit(vert_z[v]); 878 coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i; 879 if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j; 880 if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k; 881 } 882 PetscCall(VecRestoreArray(coordinates, &coords)); 883 PetscCall(DMSetCoordinatesLocal(dm, coordinates)); 884 PetscCall(VecDestroy(&coordinates)); 885 } 886 if (interpolate) { 887 PetscCall(DMPlexInterpolateInPlace_Internal(dm)); 888 // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot 889 // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the 890 // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make 891 // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might 892 // be needed in a general CGNS reader, for example. 893 PetscCall(DMPlexOrientPositiveEdges_Private(dm)); 894 895 DMLabel label; 896 PetscCall(DMCreateLabel(dm, "Face Sets")); 897 PetscCall(DMGetLabel(dm, "Face Sets", &label)); 898 PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3]; 899 for (PetscInt i = 0; i < 3; i++) { 900 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i])); 901 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i])); 902 PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i])); 903 } 904 PetscInt fStart, fEnd, vStart, vEnd; 905 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 906 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 907 for (PetscInt f = fStart; f < fEnd; f++) { 908 PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8]; 909 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points)); 910 PetscInt bc_count[6] = {0}; 911 for (PetscInt i = 0; i < npoints; i++) { 912 PetscInt p = points[2 * i]; 913 if (p < vStart || vEnd <= p) continue; 914 fverts[num_fverts++] = p; 915 Ijk loc = ZCodeSplit(vert_z[p - vStart]); 916 // Convention here matches DMPlexCreateCubeMesh_Internal 917 bc_count[0] += loc.i == 0; 918 bc_count[1] += loc.i == layout.vextent.i - 1; 919 bc_count[2] += loc.j == 0; 920 bc_count[3] += loc.j == layout.vextent.j - 1; 921 bc_count[4] += loc.k == 0; 922 bc_count[5] += loc.k == layout.vextent.k - 1; 923 } 924 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points)); 925 for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) { 926 if (bc_count[bc] == PetscPowInt(2, dim - 1)) { 927 PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc])); 928 if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) { 929 PetscInt *put; 930 if (bc % 2 == 0) { // donor face; no label 931 PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put)); 932 *put = f; 933 } else { // periodic face 934 PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put)); 935 *put = f; 936 ZCode *zput; 937 PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput)); 938 for (PetscInt i = 0; i < num_fverts; i++) { 939 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]); 940 switch (bc / 2) { 941 case 0: 942 loc.i = 0; 943 break; 944 case 1: 945 loc.j = 0; 946 break; 947 case 2: 948 loc.k = 0; 949 break; 950 } 951 *zput++ = ZEncode(loc); 952 } 953 } 954 continue; 955 } 956 PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets"); 957 PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc])); 958 bc_match++; 959 } 960 } 961 } 962 // Ensure that the Coordinate DM has our new boundary labels 963 DM cdm; 964 PetscCall(DMGetCoordinateDM(dm, &cdm)); 965 PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL)); 966 if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) { 967 PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces)); 968 } 969 for (PetscInt i = 0; i < 3; i++) { 970 PetscCall(PetscSegBufferDestroy(&per_faces[i])); 971 PetscCall(PetscSegBufferDestroy(&donor_face_closure[i])); 972 PetscCall(PetscSegBufferDestroy(&my_donor_faces[i])); 973 } 974 } 975 PetscCall(PetscFree(layout.zstarts)); 976 PetscCall(PetscFree(vert_z)); 977 PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0)); 978 PetscFunctionReturn(PETSC_SUCCESS); 979 } 980 981 /*@ 982 DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh 983 984 Logically Collective 985 986 Input Parameters: 987 + dm - The `DMPLEX` on which to set periodicity 988 . num_face_sfs - Number of `PetscSF`s in `face_sfs` 989 - face_sfs - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face. 990 991 Level: advanced 992 993 Note: 994 One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally 995 and locally, but are paired when creating a global dof space. 996 997 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()` 998 @*/ 999 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs) 1000 { 1001 DM_Plex *plex = (DM_Plex *)dm->data; 1002 1003 PetscFunctionBegin; 1004 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1005 if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex)); 1006 if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 1007 1008 for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i])); 1009 1010 if (plex->periodic.num_face_sfs > 0) { 1011 for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i])); 1012 PetscCall(PetscFree(plex->periodic.face_sfs)); 1013 } 1014 1015 plex->periodic.num_face_sfs = num_face_sfs; 1016 PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs)); 1017 for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i]; 1018 1019 DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one 1020 if (cdm) { 1021 PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs)); 1022 if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal; 1023 } 1024 PetscFunctionReturn(PETSC_SUCCESS); 1025 } 1026 1027 /*@C 1028 DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh 1029 1030 Logically Collective 1031 1032 Input Parameter: 1033 . dm - The `DMPLEX` for which to obtain periodic relation 1034 1035 Output Parameters: 1036 + num_face_sfs - Number of `PetscSF`s in the array 1037 - face_sfs - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face. 1038 1039 Level: advanced 1040 1041 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()` 1042 @*/ 1043 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs) 1044 { 1045 DM_Plex *plex = (DM_Plex *)dm->data; 1046 1047 PetscFunctionBegin; 1048 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1049 *face_sfs = plex->periodic.face_sfs; 1050 *num_face_sfs = plex->periodic.num_face_sfs; 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 /*@C 1055 DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points 1056 1057 Logically Collective 1058 1059 Input Parameters: 1060 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()` 1061 . n - Number of transforms in array 1062 - t - Array of 4x4 affine transformation basis. 1063 1064 Level: advanced 1065 1066 Notes: 1067 Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation), 1068 the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always 1069 be 1. This representation is common in geometric modeling and allows affine transformations to be composed using 1070 simple matrix multiplication. 1071 1072 Although the interface accepts a general affine transform, only affine translation is supported at present. 1073 1074 Developer Notes: 1075 This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and 1076 adding GPU implementations to apply the G2L/L2G transforms. 1077 1078 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()` 1079 @*/ 1080 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[]) 1081 { 1082 DM_Plex *plex = (DM_Plex *)dm->data; 1083 1084 PetscFunctionBegin; 1085 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1086 PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs); 1087 1088 PetscCall(PetscMalloc1(n, &plex->periodic.transform)); 1089 for (PetscInt i = 0; i < n; i++) { 1090 for (PetscInt j = 0; j < 4; j++) { 1091 for (PetscInt k = 0; k < 4; k++) { 1092 PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported"); 1093 plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k]; 1094 } 1095 } 1096 } 1097 PetscFunctionReturn(PETSC_SUCCESS); 1098 } 1099