1 #include <petsc/private/dmpleximpl.h> /*I "petscdmplex.h" I*/ 2 #include <petscsf.h> 3 #include <petsc/private/hashset.h> 4 5 typedef uint64_t ZCode; 6 7 PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual) 8 9 typedef struct { 10 PetscInt i, j, k; 11 } Ijk; 12 13 typedef struct { 14 Ijk eextent; 15 Ijk vextent; 16 PetscMPIInt comm_size; 17 ZCode *zstarts; 18 } ZLayout; 19 20 // ***** Overview of ZCode ******* 21 // The SFC uses integer indexing for each dimension and encodes them into a single integer by interleaving the bits of each index. 22 // This is known as Morton encoding, and is refered to as ZCode in this code. 23 // So for index i having bits [i2,i1,i0], and similar for indexes j and k, the ZCode (Morton number) would be: 24 // [k2,j2,i2,k1,j1,i1,k0,j0,i0] 25 // This encoding allows for easier traversal of the SFC structure (see https://en.wikipedia.org/wiki/Z-order_curve and `ZStepOct()`). 26 // `ZEncode()` is used to go from indices to ZCode, while `ZCodeSplit()` goes from ZCode back to indices. 27 28 // Decodes the leading interleaved index from a ZCode 29 // e.g. [k2,j2,i2,k1,j1,i1,k0,j0,i0] -> [i2,i1,i0] 30 // Magic numbers taken from https://stackoverflow.com/a/18528775/7564988 (translated to octal) 31 static unsigned ZCodeSplit1(ZCode z) 32 { 33 z &= 0111111111111111111111; 34 z = (z | z >> 2) & 0103030303030303030303; 35 z = (z | z >> 4) & 0100170017001700170017; 36 z = (z | z >> 8) & 0370000037700000377; 37 z = (z | z >> 16) & 0370000000000177777; 38 z = (z | z >> 32) & 07777777; 39 return (unsigned)z; 40 } 41 42 // Encodes the leading interleaved index from a ZCode 43 // e.g. [i2,i1,i0] -> [0,0,i2,0,0,i1,0,0,i0] 44 static ZCode ZEncode1(unsigned t) 45 { 46 ZCode z = t; 47 z &= 07777777; 48 z = (z | z << 32) & 0370000000000177777; 49 z = (z | z << 16) & 0370000037700000377; 50 z = (z | z << 8) & 0100170017001700170017; 51 z = (z | z << 4) & 0103030303030303030303; 52 z = (z | z << 2) & 0111111111111111111111; 53 return z; 54 } 55 56 // Decodes i j k indices from a ZCode. 57 // Uses `ZCodeSplit1()` by shifting ZCode so that the leading index is the desired one to decode 58 static Ijk ZCodeSplit(ZCode z) 59 { 60 Ijk c; 61 c.i = ZCodeSplit1(z >> 2); 62 c.j = ZCodeSplit1(z >> 1); 63 c.k = ZCodeSplit1(z >> 0); 64 return c; 65 } 66 67 // Encodes i j k indices to a ZCode. 68 // Uses `ZCodeEncode1()` by shifting resulting ZCode to the appropriate bit placement 69 static ZCode ZEncode(Ijk c) 70 { 71 ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k); 72 return z; 73 } 74 75 static PetscBool IjkActive(Ijk extent, Ijk l) 76 { 77 if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE; 78 return PETSC_FALSE; 79 } 80 81 // If z is not the base of an octet (last 3 bits 0), return 0. 82 // 83 // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z 84 // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point. 85 static ZCode ZStepOct(ZCode z) 86 { 87 if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0 88 ZCode step = 07; 89 for (; (z & step) == 0; step = (step << 3) | 07) { } 90 return step >> 3; 91 } 92 93 // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous. 94 static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout) 95 { 96 PetscFunctionBegin; 97 layout->eextent.i = eextent[0]; 98 layout->eextent.j = eextent[1]; 99 layout->eextent.k = eextent[2]; 100 layout->vextent.i = vextent[0]; 101 layout->vextent.j = vextent[1]; 102 layout->vextent.k = vextent[2]; 103 layout->comm_size = size; 104 layout->zstarts = NULL; 105 PetscCall(PetscMalloc1(size + 1, &layout->zstarts)); 106 107 PetscInt total_elems = eextent[0] * eextent[1] * eextent[2]; 108 ZCode z = 0; 109 layout->zstarts[0] = 0; 110 // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound 111 for (PetscMPIInt r = 0; r < size; r++) { 112 PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count; 113 for (count = 0; count < elems_needed; z++) { 114 ZCode skip = ZStepOct(z); // optimistically attempt a longer step 115 for (ZCode s = skip;; s >>= 3) { 116 Ijk trial = ZCodeSplit(z + s); 117 if (IjkActive(layout->eextent, trial)) { 118 while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet 119 count += s + 1; 120 z += s; 121 break; 122 } 123 if (s == 0) { // the whole skip octet is inactive 124 z += skip; 125 break; 126 } 127 } 128 } 129 // Pick up any extra vertices in the Z ordering before the next rank's first owned element. 130 // 131 // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up 132 // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will 133 // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of 134 // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution). 135 // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would 136 // complicate the job of identifying an owner and its offset. 137 // 138 // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is 139 // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's 140 // the issue: 141 // 142 // Consider this partition on rank 0 (left) and rank 1. 143 // 144 // 4 -------- 5 -- 14 --10 -- 21 --11 145 // | | | 146 // 7 -- 16 -- 8 | | | 147 // | | 3 ------- 7 ------- 9 148 // | | | | 149 // 4 -------- 6 ------ 10 | | 150 // | | | 6 -- 16 -- 8 151 // | | | 152 // 3 ---11--- 5 --18-- 9 153 // 154 // The periodic face SF looks like 155 // [0] Number of roots=21, leaves=1, remote ranks=1 156 // [0] 16 <- (0,11) 157 // [1] Number of roots=22, leaves=2, remote ranks=2 158 // [1] 14 <- (0,18) 159 // [1] 21 <- (1,16) 160 // 161 // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use 162 // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face 163 // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16). 164 // 165 // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not 166 // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer 167 // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4) 168 // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the 169 // stranded vertices. 170 for (; z <= ZEncode(layout->vextent); z++) { 171 Ijk loc = ZCodeSplit(z); 172 if (IjkActive(layout->eextent, loc)) break; 173 z += ZStepOct(z); 174 } 175 layout->zstarts[r + 1] = z; 176 } 177 layout->zstarts[size] = ZEncode(layout->vextent); 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank) 182 { 183 PetscInt remote_elem = 0; 184 for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) { 185 Ijk loc = ZCodeSplit(rz); 186 if (IjkActive(layout->eextent, loc)) remote_elem++; 187 else rz += ZStepOct(rz); 188 } 189 return remote_elem; 190 } 191 192 static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[]) 193 { 194 PetscInt lo = 0, hi = n; 195 196 if (n == 0) return -1; 197 while (hi - lo > 1) { 198 PetscInt mid = lo + (hi - lo) / 2; 199 if (key < X[mid]) hi = mid; 200 else lo = mid; 201 } 202 return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1); 203 } 204 205 static inline PetscBool IsPointInsideStratum(PetscInt point, PetscInt pStart, PetscInt pEnd) 206 { 207 return (point >= pStart && point < pEnd) ? PETSC_TRUE : PETSC_FALSE; 208 } 209 210 static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3]) 211 { 212 MPI_Comm comm; 213 PetscInt dim, vStart, vEnd; 214 PetscMPIInt size; 215 PetscSF face_sfs[3]; 216 PetscScalar transforms[3][4][4] = {{{0}}}; 217 218 PetscFunctionBegin; 219 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 220 PetscCallMPI(MPI_Comm_size(comm, &size)); 221 PetscCall(DMGetDimension(dm, &dim)); 222 const PetscInt csize = PetscPowInt(2, dim - 1); 223 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 224 225 PetscInt num_directions = 0; 226 for (PetscInt direction = 0; direction < dim; direction++) { 227 PetscCount num_faces; 228 PetscInt *faces; 229 ZCode *donor_verts, *donor_minz; 230 PetscSFNode *leaf; 231 PetscCount num_multiroots = 0; 232 PetscInt pStart, pEnd; 233 PetscBool sorted; 234 PetscInt inum_faces; 235 236 if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue; 237 PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces)); 238 PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces)); 239 PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts)); 240 PetscCall(PetscMalloc1(num_faces, &donor_minz)); 241 PetscCall(PetscMalloc1(num_faces, &leaf)); 242 for (PetscCount i = 0; i < num_faces; i++) { 243 ZCode minz = donor_verts[i * csize]; 244 245 for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]); 246 donor_minz[i] = minz; 247 } 248 PetscCall(PetscIntCast(num_faces, &inum_faces)); 249 PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted)); 250 // If a donor vertex were chosen to broker multiple faces, we would have a logic error. 251 // Checking for sorting is a cheap check that there are no duplicates. 252 PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked"); 253 for (PetscCount i = 0; i < num_faces;) { 254 ZCode z = donor_minz[i]; 255 PetscMPIInt remote_rank, remote_count = 0; 256 257 PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout->zstarts), &remote_rank)); 258 if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1; 259 // Process all the vertices on this rank 260 for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) { 261 Ijk loc = ZCodeSplit(rz); 262 263 if (rz == z) { 264 leaf[i].rank = remote_rank; 265 leaf[i].index = remote_count; 266 i++; 267 if (i == num_faces) break; 268 z = donor_minz[i]; 269 } 270 if (IjkActive(layout->vextent, loc)) remote_count++; 271 } 272 } 273 PetscCall(PetscFree(donor_minz)); 274 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions])); 275 PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER)); 276 const PetscInt *my_donor_degree; 277 PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree)); 278 PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree)); 279 280 for (PetscInt i = 0; i < vEnd - vStart; i++) { 281 num_multiroots += my_donor_degree[i]; 282 if (my_donor_degree[i] == 0) continue; 283 PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces"); 284 } 285 PetscInt *my_donors, *donor_indices, *my_donor_indices; 286 PetscCount num_my_donors; 287 288 PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors)); 289 PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request (%" PetscCount_FMT ") does not match expected donors (%" PetscCount_FMT ")", num_my_donors, num_multiroots); 290 PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors)); 291 PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices)); 292 for (PetscCount i = 0; i < num_my_donors; i++) { 293 PetscInt f = my_donors[i]; 294 PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX; 295 296 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points)); 297 for (PetscInt j = 0; j < num_points; j++) { 298 PetscInt p = points[2 * j]; 299 if (!IsPointInsideStratum(p, vStart, vEnd)) continue; 300 minv = PetscMin(minv, p); 301 } 302 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points)); 303 PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested"); 304 my_donor_indices[minv - vStart] = f; 305 } 306 PetscCall(PetscMalloc1(num_faces, &donor_indices)); 307 PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE)); 308 PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE)); 309 PetscCall(PetscFree(my_donor_indices)); 310 // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces. 311 for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i]; 312 PetscCall(PetscFree(donor_indices)); 313 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 314 PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER)); 315 { 316 char face_sf_name[PETSC_MAX_PATH_LEN]; 317 PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions)); 318 PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name)); 319 } 320 321 transforms[num_directions][0][0] = 1; 322 transforms[num_directions][1][1] = 1; 323 transforms[num_directions][2][2] = 1; 324 transforms[num_directions][3][3] = 1; 325 transforms[num_directions][direction][3] = upper[direction] - lower[direction]; 326 num_directions++; 327 } 328 329 PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs)); 330 PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms)); 331 332 for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i])); 333 PetscFunctionReturn(PETSC_SUCCESS); 334 } 335 336 // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to 337 // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector). 338 // We use this crude approach here so we don't have to write new GPU kernels yet. 339 static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx) 340 { 341 PetscFunctionBegin; 342 // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped. 343 for (PetscInt i = 0; i < dm->periodic.num_affines; i++) { 344 PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD)); 345 PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD)); 346 } 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 // Creates SF to communicate data from donor to periodic faces. The data can be different sizes per donor-periodic pair and is given in `point_sizes[]` 351 PetscErrorCode CreateDonorToPeriodicSF(DM dm, PetscSF face_sf, PetscInt pStart, PetscInt pEnd, const PetscInt point_sizes[], PetscInt *rootbuffersize, PetscInt *leafbuffersize, PetscBT *rootbt, PetscSF *sf_closure) 352 { 353 MPI_Comm comm; 354 PetscMPIInt rank; 355 PetscInt nroots, nleaves; 356 PetscInt *rootdata, *leafdata; 357 const PetscInt *filocal; 358 const PetscSFNode *firemote; 359 360 PetscFunctionBeginUser; 361 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 362 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 363 364 PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote)); 365 PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata)); 366 for (PetscInt i = 0; i < nleaves; i++) { 367 PetscInt point = filocal[i]; 368 PetscCheck(IsPointInsideStratum(point, pStart, pEnd), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in leaves exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", point, pStart, pEnd); 369 leafdata[point] = point_sizes[point - pStart]; 370 } 371 PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM)); 372 PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM)); 373 374 PetscInt root_offset = 0; 375 PetscCall(PetscBTCreate(nroots, rootbt)); 376 for (PetscInt p = 0; p < nroots; p++) { 377 const PetscInt *donor_dof = rootdata + nroots; 378 if (donor_dof[p] == 0) { 379 rootdata[2 * p] = -1; 380 rootdata[2 * p + 1] = -1; 381 continue; 382 } 383 PetscCall(PetscBTSet(*rootbt, p)); 384 PetscCheck(IsPointInsideStratum(p, pStart, pEnd), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Point %" PetscInt_FMT " in roots exists outside of stratum [%" PetscInt_FMT ", %" PetscInt_FMT ")", p, pStart, pEnd); 385 PetscInt p_size = point_sizes[p - pStart]; 386 PetscCheck(donor_dof[p] == p_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf data size (%" PetscInt_FMT ") does not match root data size (%" PetscInt_FMT ")", donor_dof[p], p_size); 387 rootdata[2 * p] = root_offset; 388 rootdata[2 * p + 1] = p_size; 389 root_offset += p_size; 390 } 391 PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE)); 392 PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE)); 393 // Count how many leaves we need to communicate the closures 394 PetscInt leaf_offset = 0; 395 for (PetscInt i = 0; i < nleaves; i++) { 396 PetscInt point = filocal[i]; 397 if (leafdata[2 * point + 1] < 0) continue; 398 leaf_offset += leafdata[2 * point + 1]; 399 } 400 401 PetscSFNode *closure_leaf; 402 PetscCall(PetscMalloc1(leaf_offset, &closure_leaf)); 403 leaf_offset = 0; 404 for (PetscInt i = 0; i < nleaves; i++) { 405 PetscInt point = filocal[i]; 406 PetscInt cl_size = leafdata[2 * point + 1]; 407 if (cl_size < 0) continue; 408 for (PetscInt j = 0; j < cl_size; j++) { 409 closure_leaf[leaf_offset].rank = firemote[i].rank; 410 closure_leaf[leaf_offset].index = leafdata[2 * point] + j; 411 leaf_offset++; 412 } 413 } 414 415 PetscCall(PetscSFCreate(comm, sf_closure)); 416 PetscCall(PetscSFSetGraph(*sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER)); 417 *rootbuffersize = root_offset; 418 *leafbuffersize = leaf_offset; 419 PetscCall(PetscFree2(rootdata, leafdata)); 420 PetscFunctionReturn(PETSC_SUCCESS); 421 } 422 423 // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure 424 // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures 425 // are isomorphic. It may be useful/necessary for this restriction to be loosened. 426 // 427 // Output Arguments: 428 // 429 // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This 430 // can be used to create a global section and section SF. 431 // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine 432 // transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal(). 433 // 434 static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points) 435 { 436 MPI_Comm comm; 437 PetscMPIInt rank; 438 PetscSF point_sf; 439 PetscInt nroots, nleaves; 440 const PetscInt *filocal; 441 const PetscSFNode *firemote; 442 443 PetscFunctionBegin; 444 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 445 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 446 PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points 447 PetscCall(PetscMalloc1(num_face_sfs, is_points)); 448 449 for (PetscInt f = 0; f < num_face_sfs; f++) { 450 PetscSF face_sf = face_sfs[f]; 451 PetscInt *cl_sizes; 452 PetscInt fStart, fEnd, rootbuffersize, leafbuffersize; 453 PetscSF sf_closure; 454 PetscBT rootbt; 455 456 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 457 PetscCall(PetscMalloc1(fEnd - fStart, &cl_sizes)); 458 for (PetscInt f = fStart, index = 0; f < fEnd; f++, index++) { 459 PetscInt cl_size, *closure = NULL; 460 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure)); 461 cl_sizes[index] = cl_size - 1; 462 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &cl_size, &closure)); 463 } 464 465 PetscCall(CreateDonorToPeriodicSF(dm, face_sf, fStart, fEnd, cl_sizes, &rootbuffersize, &leafbuffersize, &rootbt, &sf_closure)); 466 PetscCall(PetscFree(cl_sizes)); 467 PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote)); 468 469 PetscSFNode *leaf_donor_closure; 470 { // Pack root buffer with owner for every point in the root cones 471 PetscSFNode *donor_closure; 472 const PetscInt *pilocal; 473 const PetscSFNode *piremote; 474 PetscInt npoints; 475 476 PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote)); 477 PetscCall(PetscCalloc1(rootbuffersize, &donor_closure)); 478 for (PetscInt p = 0, root_offset = 0; p < nroots; p++) { 479 if (!PetscBTLookup(rootbt, p)) continue; 480 PetscInt cl_size, *closure = NULL; 481 PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 482 for (PetscInt j = 1; j < cl_size; j++) { 483 PetscInt c = closure[2 * j]; 484 if (pilocal) { 485 PetscInt found = -1; 486 if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found)); 487 if (found >= 0) { 488 donor_closure[root_offset++] = piremote[found]; 489 continue; 490 } 491 } 492 // we own c 493 donor_closure[root_offset].rank = rank; 494 donor_closure[root_offset].index = c; 495 root_offset++; 496 } 497 PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure)); 498 } 499 500 PetscCall(PetscMalloc1(leafbuffersize, &leaf_donor_closure)); 501 PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE)); 502 PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE)); 503 PetscCall(PetscSFDestroy(&sf_closure)); 504 PetscCall(PetscFree(donor_closure)); 505 } 506 507 PetscSFNode *new_iremote; 508 PetscCall(PetscCalloc1(nroots, &new_iremote)); 509 for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1; 510 // Walk leaves and match vertices 511 for (PetscInt i = 0, leaf_offset = 0; i < nleaves; i++) { 512 PetscInt point = filocal[i], cl_size; 513 PetscInt *closure = NULL; 514 PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 515 for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency? 516 PetscInt c = closure[2 * j]; 517 PetscSFNode lc = leaf_donor_closure[leaf_offset]; 518 // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index); 519 if (new_iremote[c].rank == -1) { 520 new_iremote[c] = lc; 521 } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces"); 522 leaf_offset++; 523 } 524 PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure)); 525 } 526 PetscCall(PetscFree(leaf_donor_closure)); 527 528 // Include face points in closure SF 529 for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i]; 530 // consolidate leaves 531 PetscInt *leafdata; 532 PetscCall(PetscMalloc1(nroots, &leafdata)); 533 PetscInt num_new_leaves = 0; 534 for (PetscInt i = 0; i < nroots; i++) { 535 if (new_iremote[i].rank == -1) continue; 536 new_iremote[num_new_leaves] = new_iremote[i]; 537 leafdata[num_new_leaves] = i; 538 num_new_leaves++; 539 } 540 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f])); 541 542 PetscSF csf; 543 PetscCall(PetscSFCreate(comm, &csf)); 544 PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES)); 545 PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be 546 PetscCall(PetscFree(leafdata)); 547 PetscCall(PetscBTDestroy(&rootbt)); 548 549 PetscInt npoints; 550 PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL)); 551 if (npoints < 0) { // empty point_sf 552 *closure_sf = csf; 553 } else { 554 PetscCall(PetscSFMerge(point_sf, csf, closure_sf)); 555 PetscCall(PetscSFDestroy(&csf)); 556 } 557 if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge 558 point_sf = *closure_sf; // Use combined point + isoperiodic SF to define point ownership for further face_sf 559 } 560 PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points")); 561 PetscFunctionReturn(PETSC_SUCCESS); 562 } 563 564 static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf) 565 { 566 DM_Plex *plex = (DM_Plex *)dm->data; 567 568 PetscFunctionBegin; 569 if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points)); 570 if (sf) *sf = plex->periodic.composed_sf; 571 PetscFunctionReturn(PETSC_SUCCESS); 572 } 573 574 PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration) 575 { 576 DM_Plex *plex = (DM_Plex *)old_dm->data; 577 PetscSF sf_point, *new_face_sfs; 578 PetscMPIInt rank; 579 580 PetscFunctionBegin; 581 if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 582 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 583 PetscCall(DMGetPointSF(dm, &sf_point)); 584 PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs)); 585 586 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) { 587 PetscInt old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf; 588 PetscSFNode *new_leafdata, *rootdata, *leafdata; 589 const PetscInt *old_local, *point_local; 590 const PetscSFNode *old_remote, *point_remote; 591 592 PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote)); 593 PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL)); 594 PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote)); 595 PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space"); 596 PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata)); 597 598 // Fill new_leafdata with new owners of all points 599 for (PetscInt i = 0; i < new_npoints; i++) { 600 new_leafdata[i].rank = rank; 601 new_leafdata[i].index = i; 602 } 603 for (PetscInt i = 0; i < point_nleaf; i++) { 604 PetscInt j = point_local[i]; 605 new_leafdata[j] = point_remote[i]; 606 } 607 // REPLACE is okay because every leaf agrees about the new owners 608 PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE)); 609 PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE)); 610 // rootdata now contains the new owners 611 612 // Send to leaves of old space 613 for (PetscInt i = 0; i < old_npoints; i++) { 614 leafdata[i].rank = -1; 615 leafdata[i].index = -1; 616 } 617 PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE)); 618 PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE)); 619 620 // Send to new leaf space 621 PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE)); 622 PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE)); 623 624 PetscInt nface = 0, *new_local; 625 PetscSFNode *new_remote; 626 for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0); 627 PetscCall(PetscMalloc1(nface, &new_local)); 628 PetscCall(PetscMalloc1(nface, &new_remote)); 629 nface = 0; 630 for (PetscInt i = 0; i < new_npoints; i++) { 631 if (new_leafdata[i].rank == -1) continue; 632 new_local[nface] = i; 633 new_remote[nface] = new_leafdata[i]; 634 nface++; 635 } 636 PetscCall(PetscFree3(rootdata, leafdata, new_leafdata)); 637 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f])); 638 PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER)); 639 { 640 char new_face_sf_name[PETSC_MAX_PATH_LEN]; 641 PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f)); 642 PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name)); 643 } 644 } 645 646 PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs)); 647 PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform)); 648 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f])); 649 PetscCall(PetscFree(new_face_sfs)); 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm) 654 { 655 DM_Plex *plex = (DM_Plex *)dm->data; 656 PetscCount count; 657 IS isdof; 658 PetscInt dim; 659 660 PetscFunctionBegin; 661 if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS); 662 PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL)); 663 PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex)); 664 665 PetscCall(DMGetDimension(dm, &dim)); 666 dm->periodic.num_affines = plex->periodic.num_face_sfs; 667 PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine)); 668 669 for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) { 670 PetscInt npoints, vsize, isize; 671 const PetscInt *points; 672 IS is = plex->periodic.periodic_points[f]; 673 PetscSegBuffer seg; 674 PetscSection section; 675 PetscInt *ind; 676 Vec L, P; 677 VecType vec_type; 678 VecScatter scatter; 679 PetscScalar *x; 680 681 PetscCall(DMGetLocalSection(dm, §ion)); 682 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg)); 683 PetscCall(ISGetSize(is, &npoints)); 684 PetscCall(ISGetIndices(is, &points)); 685 for (PetscInt i = 0; i < npoints; i++) { 686 PetscInt point = points[i], off, dof; 687 688 PetscCall(PetscSectionGetOffset(section, point, &off)); 689 PetscCall(PetscSectionGetDof(section, point, &dof)); 690 PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim); 691 for (PetscInt j = 0; j < dof / dim; j++) { 692 PetscInt *slot; 693 694 PetscCall(PetscSegBufferGetInts(seg, 1, &slot)); 695 *slot = off / dim + j; 696 } 697 } 698 PetscCall(PetscSegBufferGetSize(seg, &count)); 699 PetscCall(PetscSegBufferExtractAlloc(seg, &ind)); 700 PetscCall(PetscSegBufferDestroy(&seg)); 701 PetscCall(PetscIntCast(count, &isize)); 702 PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof)); 703 704 PetscCall(PetscIntCast(count * dim, &vsize)); 705 PetscCall(DMGetLocalVector(dm, &L)); 706 PetscCall(VecCreate(PETSC_COMM_SELF, &P)); 707 PetscCall(VecSetSizes(P, vsize, vsize)); 708 PetscCall(VecGetType(L, &vec_type)); 709 PetscCall(VecSetType(P, vec_type)); 710 PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter)); 711 PetscCall(DMRestoreLocalVector(dm, &L)); 712 PetscCall(ISDestroy(&isdof)); 713 714 PetscCall(VecGetArrayWrite(P, &x)); 715 for (PetscCount i = 0; i < count; i++) { 716 for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3]; 717 } 718 PetscCall(VecRestoreArrayWrite(P, &x)); 719 720 dm->periodic.affine_to_local[f] = scatter; 721 dm->periodic.affine[f] = P; 722 } 723 PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL)); 724 PetscFunctionReturn(PETSC_SUCCESS); 725 } 726 727 // We'll just orient all the edges, though only periodic boundary edges need orientation 728 static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm) 729 { 730 PetscInt dim, eStart, eEnd; 731 732 PetscFunctionBegin; 733 PetscCall(DMGetDimension(dm, &dim)); 734 if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary 735 PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd)); 736 for (PetscInt e = eStart; e < eEnd; e++) { 737 const PetscInt *cone; 738 PetscCall(DMPlexGetCone(dm, e, &cone)); 739 if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1)); 740 } 741 PetscFunctionReturn(PETSC_SUCCESS); 742 } 743 744 PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate) 745 { 746 PetscInt eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1}; 747 const Ijk closure_1[] = { 748 {0, 0, 0}, 749 {1, 0, 0}, 750 }; 751 const Ijk closure_2[] = { 752 {0, 0, 0}, 753 {1, 0, 0}, 754 {1, 1, 0}, 755 {0, 1, 0}, 756 }; 757 const Ijk closure_3[] = { 758 {0, 0, 0}, 759 {0, 1, 0}, 760 {1, 1, 0}, 761 {1, 0, 0}, 762 {0, 0, 1}, 763 {1, 0, 1}, 764 {1, 1, 1}, 765 {0, 1, 1}, 766 }; 767 const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3}; 768 // This must be kept consistent with DMPlexCreateCubeMesh_Internal 769 const PetscInt face_marker_1[] = {1, 2}; 770 const PetscInt face_marker_2[] = {4, 2, 1, 3}; 771 const PetscInt face_marker_3[] = {6, 5, 3, 4, 1, 2}; 772 const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3}; 773 // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero. 774 // These orientations can be determined by examining cones of a reference quad and hex element. 775 const PetscInt face_orient_1[] = {0, 0}; 776 const PetscInt face_orient_2[] = {-1, 0, 0, -1}; 777 const PetscInt face_orient_3[] = {-2, 0, -2, 1, -2, 0}; 778 const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3}; 779 780 PetscFunctionBegin; 781 PetscCall(PetscLogEventBegin(DMPLEX_CreateBoxSFC, dm, 0, 0, 0)); 782 PetscAssertPointer(dm, 1); 783 PetscValidLogicalCollectiveInt(dm, dim, 2); 784 PetscCall(DMSetDimension(dm, dim)); 785 PetscMPIInt rank, size; 786 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size)); 787 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 788 for (PetscInt i = 0; i < dim; i++) { 789 eextent[i] = faces[i]; 790 vextent[i] = faces[i] + 1; 791 } 792 ZLayout layout; 793 PetscCall(ZLayoutCreate(size, eextent, vextent, &layout)); 794 PetscZSet vset; // set of all vertices in the closure of the owned elements 795 PetscCall(PetscZSetCreate(&vset)); 796 PetscInt local_elems = 0; 797 for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) { 798 Ijk loc = ZCodeSplit(z); 799 if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z)); 800 else { 801 z += ZStepOct(z); 802 continue; 803 } 804 if (IjkActive(layout.eextent, loc)) { 805 local_elems++; 806 // Add all neighboring vertices to set 807 for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) { 808 Ijk inc = closure_dim[dim][n]; 809 Ijk nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k}; 810 ZCode v = ZEncode(nloc); 811 PetscCall(PetscZSetAdd(vset, v)); 812 } 813 } 814 } 815 PetscInt local_verts, off = 0; 816 ZCode *vert_z; 817 PetscCall(PetscZSetGetSize(vset, &local_verts)); 818 PetscCall(PetscMalloc1(local_verts, &vert_z)); 819 PetscCall(PetscZSetGetElems(vset, &off, vert_z)); 820 PetscCall(PetscZSetDestroy(&vset)); 821 // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed 822 PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z)); 823 824 PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts)); 825 for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim))); 826 PetscCall(DMSetUp(dm)); 827 { 828 PetscInt e = 0; 829 for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) { 830 Ijk loc = ZCodeSplit(z); 831 if (!IjkActive(layout.eextent, loc)) { 832 z += ZStepOct(z); 833 continue; 834 } 835 PetscInt cone[8], orient[8] = {0}; 836 for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) { 837 Ijk inc = closure_dim[dim][n]; 838 Ijk nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k}; 839 ZCode v = ZEncode(nloc); 840 PetscInt ci = ZCodeFind(v, local_verts, vert_z); 841 PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set"); 842 cone[n] = local_elems + ci; 843 } 844 PetscCall(DMPlexSetCone(dm, e, cone)); 845 PetscCall(DMPlexSetConeOrientation(dm, e, orient)); 846 e++; 847 } 848 } 849 850 PetscCall(DMPlexSymmetrize(dm)); 851 PetscCall(DMPlexStratify(dm)); 852 853 { // Create point SF 854 PetscSF sf; 855 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf)); 856 PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z); 857 if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found 858 PetscInt num_ghosts = local_verts - owned_verts; // Due to sorting, owned vertices always come first 859 PetscInt *local_ghosts; 860 PetscSFNode *ghosts; 861 PetscCall(PetscMalloc1(num_ghosts, &local_ghosts)); 862 PetscCall(PetscMalloc1(num_ghosts, &ghosts)); 863 for (PetscInt i = 0; i < num_ghosts;) { 864 ZCode z = vert_z[owned_verts + i]; 865 PetscMPIInt remote_rank, remote_count = 0; 866 867 PetscCall(PetscMPIIntCast(ZCodeFind(z, size + 1, layout.zstarts), &remote_rank)); 868 if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1; 869 // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z) 870 871 // Count the elements on remote_rank 872 PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank); 873 874 // Traverse vertices and make ghost links 875 for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) { 876 Ijk loc = ZCodeSplit(rz); 877 if (rz == z) { 878 local_ghosts[i] = local_elems + owned_verts + i; 879 ghosts[i].rank = remote_rank; 880 ghosts[i].index = remote_elem + remote_count; 881 i++; 882 if (i == num_ghosts) break; 883 z = vert_z[owned_verts + i]; 884 } 885 if (IjkActive(layout.vextent, loc)) remote_count++; 886 else rz += ZStepOct(rz); 887 } 888 } 889 PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER)); 890 PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF")); 891 PetscCall(DMSetPointSF(dm, sf)); 892 PetscCall(PetscSFDestroy(&sf)); 893 } 894 { 895 Vec coordinates; 896 PetscScalar *coords; 897 PetscSection coord_section; 898 PetscInt coord_size; 899 PetscCall(DMGetCoordinateSection(dm, &coord_section)); 900 PetscCall(PetscSectionSetNumFields(coord_section, 1)); 901 PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim)); 902 PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts)); 903 for (PetscInt v = 0; v < local_verts; v++) { 904 PetscInt point = local_elems + v; 905 PetscCall(PetscSectionSetDof(coord_section, point, dim)); 906 PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim)); 907 } 908 PetscCall(PetscSectionSetUp(coord_section)); 909 PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size)); 910 PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates)); 911 PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates")); 912 PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE)); 913 PetscCall(VecSetBlockSize(coordinates, dim)); 914 PetscCall(VecSetType(coordinates, VECSTANDARD)); 915 PetscCall(VecGetArray(coordinates, &coords)); 916 for (PetscInt v = 0; v < local_verts; v++) { 917 Ijk loc = ZCodeSplit(vert_z[v]); 918 coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i; 919 if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j; 920 if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k; 921 } 922 PetscCall(VecRestoreArray(coordinates, &coords)); 923 PetscCall(DMSetCoordinatesLocal(dm, coordinates)); 924 PetscCall(VecDestroy(&coordinates)); 925 } 926 if (interpolate) { 927 PetscCall(DMPlexInterpolateInPlace_Internal(dm)); 928 // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot 929 // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the 930 // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make 931 // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might 932 // be needed in a general CGNS reader, for example. 933 PetscCall(DMPlexOrientPositiveEdges_Private(dm)); 934 935 DMLabel label; 936 PetscCall(DMCreateLabel(dm, "Face Sets")); 937 PetscCall(DMGetLabel(dm, "Face Sets", &label)); 938 PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3]; 939 for (PetscInt i = 0; i < 3; i++) { 940 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i])); 941 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i])); 942 PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i])); 943 } 944 PetscInt fStart, fEnd, vStart, vEnd; 945 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 946 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 947 for (PetscInt f = fStart; f < fEnd; f++) { 948 PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8]; 949 PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points)); 950 PetscInt bc_count[6] = {0}; 951 for (PetscInt i = 0; i < npoints; i++) { 952 PetscInt p = points[2 * i]; 953 if (!IsPointInsideStratum(p, vStart, vEnd)) continue; 954 fverts[num_fverts++] = p; 955 Ijk loc = ZCodeSplit(vert_z[p - vStart]); 956 // Convention here matches DMPlexCreateCubeMesh_Internal 957 bc_count[0] += loc.i == 0; 958 bc_count[1] += loc.i == layout.vextent.i - 1; 959 bc_count[2] += loc.j == 0; 960 bc_count[3] += loc.j == layout.vextent.j - 1; 961 bc_count[4] += loc.k == 0; 962 bc_count[5] += loc.k == layout.vextent.k - 1; 963 } 964 PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points)); 965 for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) { 966 if (bc_count[bc] == PetscPowInt(2, dim - 1)) { 967 PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc])); 968 if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) { 969 PetscInt *put; 970 if (bc % 2 == 0) { // donor face; no label 971 PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put)); 972 *put = f; 973 } else { // periodic face 974 PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put)); 975 *put = f; 976 ZCode *zput; 977 PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput)); 978 for (PetscInt i = 0; i < num_fverts; i++) { 979 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]); 980 switch (bc / 2) { 981 case 0: 982 loc.i = 0; 983 break; 984 case 1: 985 loc.j = 0; 986 break; 987 case 2: 988 loc.k = 0; 989 break; 990 } 991 *zput++ = ZEncode(loc); 992 } 993 } 994 continue; 995 } 996 PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets"); 997 PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc])); 998 bc_match++; 999 } 1000 } 1001 } 1002 // Ensure that the Coordinate DM has our new boundary labels 1003 DM cdm; 1004 PetscCall(DMGetCoordinateDM(dm, &cdm)); 1005 PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL)); 1006 if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) { 1007 PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces)); 1008 } 1009 for (PetscInt i = 0; i < 3; i++) { 1010 PetscCall(PetscSegBufferDestroy(&per_faces[i])); 1011 PetscCall(PetscSegBufferDestroy(&donor_face_closure[i])); 1012 PetscCall(PetscSegBufferDestroy(&my_donor_faces[i])); 1013 } 1014 } 1015 PetscCall(PetscFree(layout.zstarts)); 1016 PetscCall(PetscFree(vert_z)); 1017 PetscCall(PetscLogEventEnd(DMPLEX_CreateBoxSFC, dm, 0, 0, 0)); 1018 PetscFunctionReturn(PETSC_SUCCESS); 1019 } 1020 1021 /*@ 1022 DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh 1023 1024 Logically Collective 1025 1026 Input Parameters: 1027 + dm - The `DMPLEX` on which to set periodicity 1028 . num_face_sfs - Number of `PetscSF`s in `face_sfs` 1029 - face_sfs - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face. 1030 1031 Level: advanced 1032 1033 Note: 1034 One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally 1035 and locally, but are paired when creating a global dof space. 1036 1037 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()` 1038 @*/ 1039 PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs) 1040 { 1041 DM_Plex *plex = (DM_Plex *)dm->data; 1042 1043 PetscFunctionBegin; 1044 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1045 if (num_face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex)); 1046 else PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", NULL)); 1047 if (num_face_sfs == plex->periodic.num_face_sfs && (num_face_sfs == 0 || face_sfs == plex->periodic.face_sfs)) PetscFunctionReturn(PETSC_SUCCESS); 1048 PetscCall(DMSetGlobalSection(dm, NULL)); 1049 1050 for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i])); 1051 1052 if (plex->periodic.num_face_sfs > 0) { 1053 for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i])); 1054 PetscCall(PetscFree(plex->periodic.face_sfs)); 1055 } 1056 1057 plex->periodic.num_face_sfs = num_face_sfs; 1058 PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs)); 1059 for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i]; 1060 1061 DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one 1062 if (cdm) { 1063 PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs)); 1064 if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal; 1065 } 1066 PetscFunctionReturn(PETSC_SUCCESS); 1067 } 1068 1069 /*@C 1070 DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh 1071 1072 Logically Collective 1073 1074 Input Parameter: 1075 . dm - The `DMPLEX` for which to obtain periodic relation 1076 1077 Output Parameters: 1078 + num_face_sfs - Number of `PetscSF`s in the array 1079 - face_sfs - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face. 1080 1081 Level: advanced 1082 1083 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()` 1084 @*/ 1085 PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs) 1086 { 1087 DM_Plex *plex = (DM_Plex *)dm->data; 1088 1089 PetscFunctionBegin; 1090 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1091 *face_sfs = plex->periodic.face_sfs; 1092 *num_face_sfs = plex->periodic.num_face_sfs; 1093 PetscFunctionReturn(PETSC_SUCCESS); 1094 } 1095 1096 /*@C 1097 DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points 1098 1099 Logically Collective 1100 1101 Input Parameters: 1102 + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()` 1103 . n - Number of transforms in array 1104 - t - Array of 4x4 affine transformation basis. 1105 1106 Level: advanced 1107 1108 Notes: 1109 Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation), 1110 the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always 1111 be 1. This representation is common in geometric modeling and allows affine transformations to be composed using 1112 simple matrix multiplication. 1113 1114 Although the interface accepts a general affine transform, only affine translation is supported at present. 1115 1116 Developer Notes: 1117 This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and 1118 adding GPU implementations to apply the G2L/L2G transforms. 1119 1120 .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()` 1121 @*/ 1122 PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[]) 1123 { 1124 DM_Plex *plex = (DM_Plex *)dm->data; 1125 1126 PetscFunctionBegin; 1127 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 1128 PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs); 1129 1130 PetscCall(PetscFree(plex->periodic.transform)); 1131 PetscCall(PetscMalloc1(n, &plex->periodic.transform)); 1132 for (PetscInt i = 0; i < n; i++) { 1133 for (PetscInt j = 0; j < 4; j++) { 1134 for (PetscInt k = 0; k < 4; k++) { 1135 PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported"); 1136 plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k]; 1137 } 1138 } 1139 } 1140 PetscFunctionReturn(PETSC_SUCCESS); 1141 } 1142