1 #include <petsc/private/dmpleximpl.h> /*I "petscdmplex.h" I*/ 2 #include <petscsf.h> 3 #include <petsc/private/hashset.h> 4 5 typedef uint64_t ZCode; 6 7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual) 8 9 typedef struct { 10 PetscInt i, j, k; 11 } Ijk; 12 13 typedef struct { 14 Ijk eextent; 15 Ijk vextent; 16 PetscMPIInt comm_size; 17 ZCode *zstarts; 18 } ZLayout; 19 20 // ***** Overview of ZCode ******* 21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index. 22 // This is known as Morton encoding, and is refered to as ZCode in this code. 23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be: 24 // [k2,j2,i2,k1,j1,i1,k0,j0,i0] 25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`). 26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices. 27 28 // Decodes the leading interleaved index from a ZCode 29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0] 30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 (translated to octal) 31 static unsigned ZCodeSplit1(ZCode z) 32 { 33 z &= 0111111111111111111111; 34 z = (z | z >> 2) & 0103030303030303030303; 35 z = (z | z >> 4) & 0100170017001700170017; 36 z = (z | z >> 8) & 0370000037700000377; 37 z = (z | z >> 16) & 0370000000000177777; 38 z = (z | z >> 32) & 07777777; 39 return (unsigned)z; 40 } 41 42 // Encodes the leading interleaved index from a ZCode 43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0] 44 static ZCode ZEncode1(unsigned t) 45 { 46 ZCode z = t; 47 z &= 07777777; 48 z = (z | z << 32) & 0370000000000177777; 49 z = (z | z << 16) & 0370000037700000377; 50 z = (z | z << 8) & 0100170017001700170017; 51 z = (z | z << 4) & 0103030303030303030303; 52 z = (z | z << 2) & 0111111111111111111111; 53 return z; 54 } 55 56 // Decodes i j k indices from a ZCode. 57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode 58 static Ijk ZCodeSplit(ZCode z) 59 { 60 Ijk c; 61 c.i = ZCodeSplit1(z >> 2); 62 c.j = ZCodeSplit1(z >> 1); 63 c.k = ZCodeSplit1(z >> 0); 64 return c; 65 } 66 67 // Encodes i j k indices to a ZCode. 68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement 69 static ZCode ZEncode(Ijk c) 70 { 71 ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k); 72 return z; 73 } 74 75 static PetscBool IjkActive(Ijk extent, Ijk l) 76 { 77 if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE; 78 return PETSC_FALSE; 79 } 80 81 // If z is not the base of an octet (last 3 bits 0), return 0. 82 // 83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z 84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point. 85 static ZCode ZStepOct(ZCode z) 86 { 87 if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0 88 ZCode step = 07; 89 for (; (z & step) == 0; step = (step << 3) | 07) { } 90 return step >> 3; 91 } 92 93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous. 94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout) 95 { 96 PetscFunctionBegin; 97 layout->eextent.i = eextent[0]; 98 layout->eextent.j = eextent[1]; 99 layout->eextent.k = eextent[2]; 100 layout->vextent.i = vextent[0]; 101 layout->vextent.j = vextent[1]; 102 layout->vextent.k = vextent[2]; 103 layout->comm_size = size; 104 layout->zstarts = NULL; 105 PetscCall(PetscMalloc1(size + 1, &layout->zstarts)); 106 107 PetscInt total_elems = eextent[0] * eextent[1] * eextent[2]; 108 ZCode z = 0; 109 layout->zstarts[0] = 0; 110 // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound 111 for (PetscMPIInt r = 0; r < size; r++) { 112 PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count; 113 for (count = 0; count < elems_needed; z++) { 114 ZCode skip = ZStepOct(z); // optimistically attempt a longer step 115 for (ZCode s = skip;; s >>= 3) { 116 Ijk trial = ZCodeSplit(z + s); 117 if (IjkActive(layout->eextent, trial)) { 118 while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet 119 count += s + 1; 120 z += s; 121 break; 122 } 123 if (s == 0) { // the whole skip octet is inactive 124 z += skip; 125 break; 126 } 127 } 128 } 129 // Pick up any extra vertices in the Z ordering before the next rank's first owned element. 130 // 131 // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up 132 // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will 133 // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of 134 // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution). 135 // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would 136 // complicate the job of identifying an owner and its offset. 137 // 138 // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is 139 // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's 140 // the issue: 141 // 142 // Consider this partition on rank 0 (left) and rank 1. 143 // 144 // 4 -------- 5 -- 14 --10 -- 21 --11 145 // | | | 146 // 7 -- 16 -- 8 | | | 147 // | | 3 ------- 7 ------- 9 148 // | | | | 149 // 4 -------- 6 ------ 10 | | 150 // | | | 6 -- 16 -- 8 151 // | | | 152 // 3 ---11--- 5 --18-- 9 153 // 154 // The periodic face SF looks like 155 // [0] Number of roots=21, leaves=1, remote ranks=1 156 // [0] 16 <- (0,11) 157 // [1] Number of roots=22, leaves=2, remote ranks=2 158 // [1] 14 <- (0,18) 159 // [1] 21 <- (1,16) 160 // 161 // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use 162 // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face 163 // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16). 164 // 165 // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not 166 // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer 167 // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4) 168 // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the 169 // stranded vertices. 170 for (; z <= ZEncode(layout->vextent); z++) { 171 Ijk loc = ZCodeSplit(z); 172 if (IjkActive(layout->eextent, loc)) break; 173 z += ZStepOct(z); 174 } 175 layout->zstarts[r + 1] = z; 176 } 177 layout->zstarts[size] = ZEncode(layout->vextent); 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank) 182 { 183 PetscInt remote_elem = 0; 184 for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) { 185 Ijk loc = ZCodeSplit(rz); 186 if (IjkActive(layout->eextent, loc)) remote_elem++; 187 else rz += ZStepOct(rz); 188 } 189 return remote_elem; 190 } 191 192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[]) 193 { 194 PetscInt lo = 0, hi = n; 195 196 if (n == 0) return -1; 197 while (hi - lo > 1) { 198 PetscInt mid = lo + (hi - lo) / 2; 199 if (key < X[mid]) hi = mid; 200 else lo = mid; 201 } 202 return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1); 203 } 204 205 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3]) 206 { 207 MPI_Comm comm; 208 PetscInt dim, vStart, vEnd; 209 PetscMPIInt size; 210 PetscSF face_sfs[3]; 211 PetscScalar transforms[3][4][4] = {{{0}}}; 212 213 PetscFunctionBegin; 214 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 215 PetscCallMPI(MPI_Comm_size(comm, &size)); 216 PetscCall(DMGetDimension(dm, &dim)); 217 const PetscInt csize = PetscPowInt(2, dim - 1); 218 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 219 220 PetscInt num_directions = 0; 221 for (PetscInt direction = 0; direction < dim; direction++) { 222 PetscCount num_faces; 223 PetscInt *faces; 224 ZCode *donor_verts, *donor_minz; 225 PetscSFNode *leaf; 226 PetscCount num_multiroots = 0; 227 PetscInt pStart, pEnd; 228 PetscBool sorted; 229 PetscInt inum_faces; 230 231 if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue; 232 PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces)); 233 PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces)); 234 PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts)); 235 PetscCall(PetscMalloc1(num_faces, &donor_minz)); 236 PetscCall(PetscMalloc1(num_faces, &leaf)); 237 for (PetscCount i = 0; i < num_faces; i++) { 238 ZCode minz = donor_verts[i * csize]; 239 240 for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]); 241 donor_minz[i] = minz; 242 } 243 PetscCall(PetscIntCast(num_faces, &inum_faces)); 244 PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted)); 245 // If a donor vertex were chosen to broker multiple faces, we would have a logic error. 246 // Checking for sorting is a cheap check that there are no duplicates. 247 PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked"); 248 for (PetscCount i = 0; i < num_faces;) { 249 ZCode z = donor_minz[i]; 250 PetscMPIInt remote_rank, remote_count = 0; 251 252 PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout->zstarts), &remote_rank)); 253 if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1; 254 // Process all the vertices on this rank 255 for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) { 256 Ijk loc = ZCodeSplit(rz); 257 258 if (rz == z) { 259 leaf[i].rank = remote_rank; 260 leaf[i].index = remote_count; 261 i++; 262 if (i == num_faces) break; 263 z = donor_minz[i]; 264 } 265 if (IjkActive(layout->vextent, loc)) remote_count++; 266 } 267 } 268 PetscCall(PetscFree(donor_minz)); 269 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions])); 270 PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER)); 271 const PetscInt *my_donor_degree; 272 PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree)); 273 PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree)); 274 275 for (PetscInt i = 0; i < vEnd - vStart; i++) { 276 num_multiroots += my_donor_degree[i]; 277 if (my_donor_degree[i] == 0) continue; 278 PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces"); 279 } 280 PetscInt *my_donors, *donor_indices, *my_donor_indices; 281 PetscCount num_my_donors; 282 283 PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors)); 284 PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots); 285 PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors)); 286 PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices)); 287 for (PetscCount i = 0; i < num_my_donors; i++) { 288 PetscInt f = my_donors[i]; 289 PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX; 290 291 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points)); 292 for (PetscInt j = 0; j < num_points; j++) { 293 PetscInt p = points[2 * j]; 294 if (p < vStart || vEnd <= p) continue; 295 minv = PetscMin(minv, p); 296 } 297 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points)); 298 PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested"); 299 my_donor_indices[minv - vStart] = f; 300 } 301 PetscCall(PetscMalloc1(num_faces, &donor_indices)); 302 PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE)); 303 PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE)); 304 PetscCall(PetscFree(my_donor_indices)); 305 // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces. 306 for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i]; 307 PetscCall(PetscFree(donor_indices)); 308 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 309 PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER)); 310 { 311 char face_sf_name[PETSC_MAX_PATH_LEN]; 312 PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions)); 313 PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name)); 314 } 315 316 transforms[num_directions][0][0] = 1; 317 transforms[num_directions][1][1] = 1; 318 transforms[num_directions][2][2] = 1; 319 transforms[num_directions][3][3] = 1; 320 transforms[num_directions][direction][3] = upper[direction] - lower[direction]; 321 num_directions++; 322 } 323 324 PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs)); 325 PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms)); 326 327 for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i])); 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to 332 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector). 333 // We use this crude approach here so we don't have to write new GPU kernels yet. 334 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx) 335 { 336 PetscFunctionBegin; 337 // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped. 338 for (PetscInt i = 0; i < dm->periodic.num_affines; i++) { 339 PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD)); 340 PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD)); 341 } 342 PetscFunctionReturn(PETSC_SUCCESS); 343 } 344 345 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure 346 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures 347 // are isomorphic. It may be useful/necessary for this restriction to be loosened. 348 // 349 // Output Arguments: 350 // 351 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This 352 // can be used to create a global section and section SF. 353 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine 354 // transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal(). 355 // 356 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points) 357 { 358 MPI_Comm comm; 359 PetscMPIInt rank; 360 PetscSF point_sf; 361 PetscInt nroots, nleaves; 362 const PetscInt *filocal; 363 const PetscSFNode *firemote; 364 365 PetscFunctionBegin; 366 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 367 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 368 PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points 369 PetscCall(PetscMalloc1(num_face_sfs, is_points)); 370 371 for (PetscInt f = 0; f < num_face_sfs; f++) { 372 PetscSF face_sf = face_sfs[f]; 373 PetscInt *rootdata, *leafdata; 374 375 PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote)); 376 PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata)); 377 for (PetscInt i = 0; i < nleaves; i++) { 378 PetscInt point = filocal[i], cl_size, *closure = NULL; 379 PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 380 leafdata[point] = cl_size - 1; 381 PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 382 } 383 PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM)); 384 PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM)); 385 386 PetscInt root_offset = 0; 387 for (PetscInt p = 0; p < nroots; p++) { 388 const PetscInt *donor_dof = rootdata + nroots; 389 if (donor_dof[p] == 0) { 390 rootdata[2 * p] = -1; 391 rootdata[2 * p + 1] = -1; 392 continue; 393 } 394 PetscInt cl_size; 395 PetscInt *closure = NULL; 396 PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 397 // cl_size - 1 = points not including self 398 PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes"); 399 rootdata[2 * p] = root_offset; 400 rootdata[2 * p + 1] = cl_size - 1; 401 root_offset += cl_size - 1; 402 PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 403 } 404 PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE)); 405 PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE)); 406 // Count how many leaves we need to communicate the closures 407 PetscInt leaf_offset = 0; 408 for (PetscInt i = 0; i < nleaves; i++) { 409 PetscInt point = filocal[i]; 410 if (leafdata[2 * point + 1] < 0) continue; 411 leaf_offset += leafdata[2 * point + 1]; 412 } 413 414 PetscSFNode *closure_leaf; 415 PetscCall(PetscMalloc1(leaf_offset, &closure_leaf)); 416 leaf_offset = 0; 417 for (PetscInt i = 0; i < nleaves; i++) { 418 PetscInt point = filocal[i]; 419 PetscInt cl_size = leafdata[2 * point + 1]; 420 if (cl_size < 0) continue; 421 for (PetscInt j = 0; j < cl_size; j++) { 422 closure_leaf[leaf_offset].rank = firemote[i].rank; 423 closure_leaf[leaf_offset].index = leafdata[2 * point] + j; 424 leaf_offset++; 425 } 426 } 427 428 PetscSF sf_closure; 429 PetscCall(PetscSFCreate(comm, &sf_closure)); 430 PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER)); 431 432 PetscSFNode *leaf_donor_closure; 433 { // Pack root buffer with owner for every point in the root cones 434 PetscSFNode *donor_closure; 435 const PetscInt *pilocal; 436 const PetscSFNode *piremote; 437 PetscInt npoints; 438 439 PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote)); 440 PetscCall(PetscCalloc1(root_offset, &donor_closure)); 441 root_offset = 0; 442 for (PetscInt p = 0; p < nroots; p++) { 443 if (rootdata[2 * p] < 0) continue; 444 PetscInt cl_size; 445 PetscInt *closure = NULL; 446 PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 447 for (PetscInt j = 1; j < cl_size; j++) { 448 PetscInt c = closure[2 * j]; 449 if (pilocal) { 450 PetscInt found = -1; 451 if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found)); 452 if (found >= 0) { 453 donor_closure[root_offset++] = piremote[found]; 454 continue; 455 } 456 } 457 // we own c 458 donor_closure[root_offset].rank = rank; 459 donor_closure[root_offset].index = c; 460 root_offset++; 461 } 462 PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 463 } 464 465 PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure)); 466 PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE)); 467 PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE)); 468 PetscCall(PetscSFDestroy(&sf_closure)); 469 PetscCall(PetscFree(donor_closure)); 470 } 471 472 PetscSFNode *new_iremote; 473 PetscCall(PetscCalloc1(nroots, &new_iremote)); 474 for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1; 475 // Walk leaves and match vertices 476 leaf_offset = 0; 477 for (PetscInt i = 0; i < nleaves; i++) { 478 PetscInt point = filocal[i], cl_size; 479 PetscInt *closure = NULL; 480 PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 481 for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency? 482 PetscInt c = closure[2 * j]; 483 PetscSFNode lc = leaf_donor_closure[leaf_offset]; 484 // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index); 485 if (new_iremote[c].rank == -1) { 486 new_iremote[c] = lc; 487 } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces"); 488 leaf_offset++; 489 } 490 PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 491 } 492 PetscCall(PetscFree(leaf_donor_closure)); 493 494 // Include face points in closure SF 495 for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i]; 496 // consolidate leaves 497 PetscInt num_new_leaves = 0; 498 for (PetscInt i = 0; i < nroots; i++) { 499 if (new_iremote[i].rank == -1) continue; 500 new_iremote[num_new_leaves] = new_iremote[i]; 501 leafdata[num_new_leaves] = i; 502 num_new_leaves++; 503 } 504 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f])); 505 506 PetscSF csf; 507 PetscCall(PetscSFCreate(comm, &csf)); 508 PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES)); 509 PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be 510 PetscCall(PetscFree2(rootdata, leafdata)); 511 512 PetscInt npoints; 513 PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL)); 514 if (npoints < 0) { // empty point_sf 515 *closure_sf = csf; 516 } else { 517 PetscCall(PetscSFMerge(point_sf, csf, closure_sf)); 518 PetscCall(PetscSFDestroy(&csf)); 519 } 520 if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge 521 point_sf = *closure_sf; // Use combined point + isoperiodic SF to define point ownership for further face_sf 522 } 523 PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points")); 524 PetscFunctionReturn(PETSC_SUCCESS); 525 } 526 527 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf) 528 { 529 DM_Plex *plex = (DM_Plex *)dm->data; 530 531 PetscFunctionBegin; 532 if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points)); 533 if (sf) *sf = plex->periodic.composed_sf; 534 PetscFunctionReturn(PETSC_SUCCESS); 535 } 536 537 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration) 538 { 539 DM_Plex *plex = (DM_Plex *)old_dm->data; 540 PetscSF sf_point, *new_face_sfs; 541 PetscMPIInt rank; 542 543 PetscFunctionBegin; 544 if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 545 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 546 PetscCall(DMGetPointSF(dm, &sf_point)); 547 PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs)); 548 549 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) { 550 PetscInt old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf; 551 PetscSFNode *new_leafdata, *rootdata, *leafdata; 552 const PetscInt *old_local, *point_local; 553 const PetscSFNode *old_remote, *point_remote; 554 555 PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote)); 556 PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL)); 557 PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote)); 558 PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space"); 559 PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata)); 560 561 // Fill new_leafdata with new owners of all points 562 for (PetscInt i = 0; i < new_npoints; i++) { 563 new_leafdata[i].rank = rank; 564 new_leafdata[i].index = i; 565 } 566 for (PetscInt i = 0; i < point_nleaf; i++) { 567 PetscInt j = point_local[i]; 568 new_leafdata[j] = point_remote[i]; 569 } 570 // REPLACE is okay because every leaf agrees about the new owners 571 PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE)); 572 PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE)); 573 // rootdata now contains the new owners 574 575 // Send to leaves of old space 576 for (PetscInt i = 0; i < old_npoints; i++) { 577 leafdata[i].rank = -1; 578 leafdata[i].index = -1; 579 } 580 PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE)); 581 PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE)); 582 583 // Send to new leaf space 584 PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE)); 585 PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE)); 586 587 PetscInt nface = 0, *new_local; 588 PetscSFNode *new_remote; 589 for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0); 590 PetscCall(PetscMalloc1(nface, &new_local)); 591 PetscCall(PetscMalloc1(nface, &new_remote)); 592 nface = 0; 593 for (PetscInt i = 0; i < new_npoints; i++) { 594 if (new_leafdata[i].rank == -1) continue; 595 new_local[nface] = i; 596 new_remote[nface] = new_leafdata[i]; 597 nface++; 598 } 599 PetscCall(PetscFree3(rootdata, leafdata, new_leafdata)); 600 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f])); 601 PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER)); 602 { 603 char new_face_sf_name[PETSC_MAX_PATH_LEN]; 604 PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f)); 605 PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name)); 606 } 607 } 608 609 PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs)); 610 PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform)); 611 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f])); 612 PetscCall(PetscFree(new_face_sfs)); 613 PetscFunctionReturn(PETSC_SUCCESS); 614 } 615 616 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm) 617 { 618 DM_Plex *plex = (DM_Plex *)dm->data; 619 PetscCount count; 620 IS isdof; 621 PetscInt dim; 622 623 PetscFunctionBegin; 624 if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 625 PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex)); 627 628 PetscCall(DMGetDimension(dm, &dim)); 629 dm->periodic.num_affines = plex->periodic.num_face_sfs; 630 PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine)); 631 632 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) { 633 PetscInt npoints, vsize, isize; 634 const PetscInt *points; 635 IS is = plex->periodic.periodic_points[f]; 636 PetscSegBuffer seg; 637 PetscSection section; 638 PetscInt *ind; 639 Vec L, P; 640 VecType vec_type; 641 VecScatter scatter; 642 PetscScalar *x; 643 644 PetscCall(DMGetLocalSection(dm, §ion)); 645 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg)); 646 PetscCall(ISGetSize(is, &npoints)); 647 PetscCall(ISGetIndices(is, &points)); 648 for (PetscInt i = 0; i < npoints; i++) { 649 PetscInt point = points[i], off, dof; 650 651 PetscCall(PetscSectionGetOffset(section, point, &off)); 652 PetscCall(PetscSectionGetDof(section, point, &dof)); 653 PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim); 654 for (PetscInt j = 0; j < dof / dim; j++) { 655 PetscInt *slot; 656 657 PetscCall(PetscSegBufferGetInts(seg, 1, &slot)); 658 *slot = off / dim + j; 659 } 660 } 661 PetscCall(PetscSegBufferGetSize(seg, &count)); 662 PetscCall(PetscSegBufferExtractAlloc(seg, &ind)); 663 PetscCall(PetscSegBufferDestroy(&seg)); 664 PetscCall(PetscIntCast(count, &isize)); 665 PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof)); 666 667 PetscCall(PetscIntCast(count * dim, &vsize)); 668 PetscCall(DMGetLocalVector(dm, &L)); 669 PetscCall(VecCreate(PETSC_COMM_SELF, &P)); 670 PetscCall(VecSetSizes(P, vsize, vsize)); 671 PetscCall(VecGetType(L, &vec_type)); 672 PetscCall(VecSetType(P, vec_type)); 673 PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter)); 674 PetscCall(DMRestoreLocalVector(dm, &L)); 675 PetscCall(ISDestroy(&isdof)); 676 677 PetscCall(VecGetArrayWrite(P, &x)); 678 for (PetscCount i = 0; i < count; i++) { 679 for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3]; 680 } 681 PetscCall(VecRestoreArrayWrite(P, &x)); 682 683 dm->periodic.affine_to_local[f] = scatter; 684 dm->periodic.affine[f] = P; 685 } 686 PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL)); 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 // We'll just orient all the edges, though only periodic boundary edges need orientation 691 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm) 692 { 693 PetscInt dim, eStart, eEnd; 694 695 PetscFunctionBegin; 696 PetscCall(DMGetDimension(dm, &dim)); 697 if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary 698 PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd)); 699 for (PetscInt e = eStart; e < eEnd; e++) { 700 const PetscInt *cone; 701 PetscCall(DMPlexGetCone(dm, e, &cone)); 702 if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1)); 703 } 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate) 708 { 709 PetscInt eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1}; 710 const Ijk closure_1[] = { 711 {0, 0, 0}, 712 {1, 0, 0}, 713 }; 714 const Ijk closure_2[] = { 715 {0, 0, 0}, 716 {1, 0, 0}, 717 {1, 1, 0}, 718 {0, 1, 0}, 719 }; 720 const Ijk closure_3[] = { 721 {0, 0, 0}, 722 {0, 1, 0}, 723 {1, 1, 0}, 724 {1, 0, 0}, 725 {0, 0, 1}, 726 {1, 0, 1}, 727 {1, 1, 1}, 728 {0, 1, 1}, 729 }; 730 const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3}; 731 // This must be kept consistent with DMPlexCreateCubeMesh_Internal 732 const PetscInt face_marker_1[] = {1, 2}; 733 const PetscInt face_marker_2[] = {4, 2, 1, 3}; 734 const PetscInt face_marker_3[] = {6, 5, 3, 4, 1, 2}; 735 const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3}; 736 // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero. 737 // These orientations can be determined by examining cones of a reference quad and hex element. 738 const PetscInt face_orient_1[] = {0, 0}; 739 const PetscInt face_orient_2[] = {-1, 0, 0, -1}; 740 const PetscInt face_orient_3[] = {-2, 0, -2, 1, -2, 0}; 741 const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3}; 742 743 PetscFunctionBegin; 744 PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0)); 745 PetscAssertPointer(dm, 1); 746 PetscValidLogicalCollectiveInt(dm, dim, 2); 747 PetscCall(DMSetDimension(dm, dim)); 748 PetscMPIInt rank, size; 749 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size)); 750 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 751 for (PetscInt i = 0; i < dim; i++) { 752 eextent[i] = faces[i]; 753 vextent[i] = faces[i] + 1; 754 } 755 ZLayout layout; 756 PetscCall(ZLayoutCreate(size, eextent, vextent, &layout)); 757 PetscZSet vset; // set of all vertices in the closure of the owned elements 758 PetscCall(PetscZSetCreate(&vset)); 759 PetscInt local_elems = 0; 760 for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) { 761 Ijk loc = ZCodeSplit(z); 762 if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z)); 763 else { 764 z += ZStepOct(z); 765 continue; 766 } 767 if (IjkActive(layout.eextent, loc)) { 768 local_elems++; 769 // Add all neighboring vertices to set 770 for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) { 771 Ijk inc = closure_dim[dim][n]; 772 Ijk nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k}; 773 ZCode v = ZEncode(nloc); 774 PetscCall(PetscZSetAdd(vset, v)); 775 } 776 } 777 } 778 PetscInt local_verts, off = 0; 779 ZCode *vert_z; 780 PetscCall(PetscZSetGetSize(vset, &local_verts)); 781 PetscCall(PetscMalloc1(local_verts, &vert_z)); 782 PetscCall(PetscZSetGetElems(vset, &off, vert_z)); 783 PetscCall(PetscZSetDestroy(&vset)); 784 // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed 785 PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z)); 786 787 PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts)); 788 for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim))); 789 PetscCall(DMSetUp(dm)); 790 { 791 PetscInt e = 0; 792 for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) { 793 Ijk loc = ZCodeSplit(z); 794 if (!IjkActive(layout.eextent, loc)) { 795 z += ZStepOct(z); 796 continue; 797 } 798 PetscInt cone[8], orient[8] = {0}; 799 for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) { 800 Ijk inc = closure_dim[dim][n]; 801 Ijk nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k}; 802 ZCode v = ZEncode(nloc); 803 PetscInt ci = ZCodeFind(v, local_verts, vert_z); 804 PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set"); 805 cone[n] = local_elems + ci; 806 } 807 PetscCall(DMPlexSetCone(dm, e, cone)); 808 PetscCall(DMPlexSetConeOrientation(dm, e, orient)); 809 e++; 810 } 811 } 812 813 PetscCall(DMPlexSymmetrize(dm)); 814 PetscCall(DMPlexStratify(dm)); 815 816 { // Create point SF 817 PetscSF sf; 818 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf)); 819 PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z); 820 if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found 821 PetscInt num_ghosts = local_verts - owned_verts; // Due to sorting, owned vertices always come first 822 PetscInt *local_ghosts; 823 PetscSFNode *ghosts; 824 PetscCall(PetscMalloc1(num_ghosts, &local_ghosts)); 825 PetscCall(PetscMalloc1(num_ghosts, &ghosts)); 826 for (PetscInt i = 0; i < num_ghosts;) { 827 ZCode z = vert_z[owned_verts + i]; 828 PetscMPIInt remote_rank, remote_count = 0; 829 830 PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout.zstarts), &remote_rank)); 831 if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1; 832 // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z) 833 834 // Count the elements on remote_rank 835 PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank); 836 837 // Traverse vertices and make ghost links 838 for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) { 839 Ijk loc = ZCodeSplit(rz); 840 if (rz == z) { 841 local_ghosts[i] = local_elems + owned_verts + i; 842 ghosts[i].rank = remote_rank; 843 ghosts[i].index = remote_elem + remote_count; 844 i++; 845 if (i == num_ghosts) break; 846 z = vert_z[owned_verts + i]; 847 } 848 if (IjkActive(layout.vextent, loc)) remote_count++; 849 else rz += ZStepOct(rz); 850 } 851 } 852 PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER)); 853 PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF")); 854 PetscCall(DMSetPointSF(dm, sf)); 855 PetscCall(PetscSFDestroy(&sf)); 856 } 857 { 858 Vec coordinates; 859 PetscScalar *coords; 860 PetscSection coord_section; 861 PetscInt coord_size; 862 PetscCall(DMGetCoordinateSection(dm, &coord_section)); 863 PetscCall(PetscSectionSetNumFields(coord_section, 1)); 864 PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim)); 865 PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts)); 866 for (PetscInt v = 0; v < local_verts; v++) { 867 PetscInt point = local_elems + v; 868 PetscCall(PetscSectionSetDof(coord_section, point, dim)); 869 PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim)); 870 } 871 PetscCall(PetscSectionSetUp(coord_section)); 872 PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size)); 873 PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates)); 874 PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates")); 875 PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE)); 876 PetscCall(VecSetBlockSize(coordinates, dim)); 877 PetscCall(VecSetType(coordinates, VECSTANDARD)); 878 PetscCall(VecGetArray(coordinates, &coords)); 879 for (PetscInt v = 0; v < local_verts; v++) { 880 Ijk loc = ZCodeSplit(vert_z[v]); 881 coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i; 882 if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j; 883 if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k; 884 } 885 PetscCall(VecRestoreArray(coordinates, &coords)); 886 PetscCall(DMSetCoordinatesLocal(dm, coordinates)); 887 PetscCall(VecDestroy(&coordinates)); 888 } 889 if (interpolate) { 890 PetscCall(DMPlexInterpolateInPlace_Internal(dm)); 891 // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot 892 // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the 893 // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make 894 // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might 895 // be needed in a general CGNS reader, for example. 896 PetscCall(DMPlexOrientPositiveEdges_Private(dm)); 897 898 DMLabel label; 899 PetscCall(DMCreateLabel(dm, "Face Sets")); 900 PetscCall(DMGetLabel(dm, "Face Sets", &label)); 901 PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3]; 902 for (PetscInt i = 0; i < 3; i++) { 903 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i])); 904 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i])); 905 PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i])); 906 } 907 PetscInt fStart, fEnd, vStart, vEnd; 908 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 909 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 910 for (PetscInt f = fStart; f < fEnd; f++) { 911 PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8]; 912 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points)); 913 PetscInt bc_count[6] = {0}; 914 for (PetscInt i = 0; i < npoints; i++) { 915 PetscInt p = points[2 * i]; 916 if (p < vStart || vEnd <= p) continue; 917 fverts[num_fverts++] = p; 918 Ijk loc = ZCodeSplit(vert_z[p - vStart]); 919 // Convention here matches DMPlexCreateCubeMesh_Internal 920 bc_count[0] += loc.i == 0; 921 bc_count[1] += loc.i == layout.vextent.i - 1; 922 bc_count[2] += loc.j == 0; 923 bc_count[3] += loc.j == layout.vextent.j - 1; 924 bc_count[4] += loc.k == 0; 925 bc_count[5] += loc.k == layout.vextent.k - 1; 926 } 927 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points)); 928 for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) { 929 if (bc_count[bc] == PetscPowInt(2, dim - 1)) { 930 PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc])); 931 if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) { 932 PetscInt *put; 933 if (bc % 2 == 0) { // donor face; no label 934 PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put)); 935 *put = f; 936 } else { // periodic face 937 PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put)); 938 *put = f; 939 ZCode *zput; 940 PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput)); 941 for (PetscInt i = 0; i < num_fverts; i++) { 942 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]); 943 switch (bc / 2) { 944 case 0: 945 loc.i = 0; 946 break; 947 case 1: 948 loc.j = 0; 949 break; 950 case 2: 951 loc.k = 0; 952 break; 953 } 954 *zput++ = ZEncode(loc); 955 } 956 } 957 continue; 958 } 959 PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets"); 960 PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc])); 961 bc_match++; 962 } 963 } 964 } 965 // Ensure that the Coordinate DM has our new boundary labels 966 DM cdm; 967 PetscCall(DMGetCoordinateDM(dm, &cdm)); 968 PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL)); 969 if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) { 970 PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces)); 971 } 972 for (PetscInt i = 0; i < 3; i++) { 973 PetscCall(PetscSegBufferDestroy(&per_faces[i])); 974 PetscCall(PetscSegBufferDestroy(&donor_face_closure[i])); 975 PetscCall(PetscSegBufferDestroy(&my_donor_faces[i])); 976 } 977 } 978 PetscCall(PetscFree(layout.zstarts)); 979 PetscCall(PetscFree(vert_z)); 980 PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0)); 981 PetscFunctionReturn(PETSC_SUCCESS); 982 } 983 984 /*@ 985 DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh 986 987 Logically Collective 988 989 Input Parameters: 990 + dm - The `DMPLEX` on which to set periodicity 991 . num_face_sfs - Number of `PetscSF`s in `face_sfs` 992 - face_sfs - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face. 993 994 Level: advanced 995 996 Note: 997 One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally 998 and locally, but are paired when creating a global dof space. 999 1000 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()` 1001 @*/ 1002 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs) 1003 { 1004 DM_Plex *plex = (DM_Plex *)dm->data; 1005 1006 PetscFunctionBegin; 1007 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1008 if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex)); 1009 if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 1010 1011 for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i])); 1012 1013 if (plex->periodic.num_face_sfs > 0) { 1014 for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i])); 1015 PetscCall(PetscFree(plex->periodic.face_sfs)); 1016 } 1017 1018 plex->periodic.num_face_sfs = num_face_sfs; 1019 PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs)); 1020 for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i]; 1021 1022 DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one 1023 if (cdm) { 1024 PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs)); 1025 if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal; 1026 } 1027 PetscFunctionReturn(PETSC_SUCCESS); 1028 } 1029 1030 /*@C 1031 DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh 1032 1033 Logically Collective 1034 1035 Input Parameter: 1036 . dm - The `DMPLEX` for which to obtain periodic relation 1037 1038 Output Parameters: 1039 + num_face_sfs - Number of `PetscSF`s in the array 1040 - face_sfs - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face. 1041 1042 Level: advanced 1043 1044 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()` 1045 @*/ 1046 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs) 1047 { 1048 DM_Plex *plex = (DM_Plex *)dm->data; 1049 1050 PetscFunctionBegin; 1051 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1052 *face_sfs = plex->periodic.face_sfs; 1053 *num_face_sfs = plex->periodic.num_face_sfs; 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 /*@C 1058 DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points 1059 1060 Logically Collective 1061 1062 Input Parameters: 1063 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()` 1064 . n - Number of transforms in array 1065 - t - Array of 4x4 affine transformation basis. 1066 1067 Level: advanced 1068 1069 Notes: 1070 Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation), 1071 the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always 1072 be 1. This representation is common in geometric modeling and allows affine transformations to be composed using 1073 simple matrix multiplication. 1074 1075 Although the interface accepts a general affine transform, only affine translation is supported at present. 1076 1077 Developer Notes: 1078 This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and 1079 adding GPU implementations to apply the G2L/L2G transforms. 1080 1081 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()` 1082 @*/ 1083 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[]) 1084 { 1085 DM_Plex *plex = (DM_Plex *)dm->data; 1086 1087 PetscFunctionBegin; 1088 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1089 PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs); 1090 1091 PetscCall(PetscMalloc1(n, &plex->periodic.transform)); 1092 for (PetscInt i = 0; i < n; i++) { 1093 for (PetscInt j = 0; j < 4; j++) { 1094 for (PetscInt k = 0; k < 4; k++) { 1095 PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported"); 1096 plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k]; 1097 } 1098 } 1099 } 1100 PetscFunctionReturn(PETSC_SUCCESS); 1101 } 1102