1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <petscdevice_cuda.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 extern void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Key: 41 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window - Use MPI-3 one-sided window for communication 43 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 44 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 45 46 Level: intermediate 47 48 Note: 49 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 50 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 51 `PetscSF`s are optimized and have better performance than the general `PetscSF`s. 52 53 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 54 @*/ 55 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 56 { 57 PetscSF b; 58 59 PetscFunctionBegin; 60 PetscAssertPointer(sf, 2); 61 PetscCall(PetscSFInitializePackage()); 62 63 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 64 b->nroots = -1; 65 b->nleaves = -1; 66 b->minleaf = PETSC_INT_MAX; 67 b->maxleaf = PETSC_INT_MIN; 68 b->nranks = -1; 69 b->rankorder = PETSC_TRUE; 70 b->ingroup = MPI_GROUP_NULL; 71 b->outgroup = MPI_GROUP_NULL; 72 b->graphset = PETSC_FALSE; 73 #if defined(PETSC_HAVE_DEVICE) 74 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 75 b->use_stream_aware_mpi = PETSC_FALSE; 76 b->unknown_input_stream = PETSC_FALSE; 77 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 78 b->backend = PETSCSF_BACKEND_KOKKOS; 79 #elif defined(PETSC_HAVE_CUDA) 80 b->backend = PETSCSF_BACKEND_CUDA; 81 #elif defined(PETSC_HAVE_HIP) 82 b->backend = PETSCSF_BACKEND_HIP; 83 #endif 84 85 #if defined(PETSC_HAVE_NVSHMEM) 86 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 87 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 88 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 89 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 90 #endif 91 #endif 92 b->vscat.from_n = -1; 93 b->vscat.to_n = -1; 94 b->vscat.unit = MPIU_SCALAR; 95 *sf = b; 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 /*@ 100 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 101 102 Collective 103 104 Input Parameter: 105 . sf - star forest 106 107 Level: advanced 108 109 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 110 @*/ 111 PetscErrorCode PetscSFReset(PetscSF sf) 112 { 113 PetscFunctionBegin; 114 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 115 PetscTryTypeMethod(sf, Reset); 116 PetscCall(PetscSFDestroy(&sf->rankssf)); 117 118 sf->nroots = -1; 119 sf->nleaves = -1; 120 sf->minleaf = PETSC_INT_MAX; 121 sf->maxleaf = PETSC_INT_MIN; 122 sf->mine = NULL; 123 sf->remote = NULL; 124 sf->graphset = PETSC_FALSE; 125 PetscCall(PetscFree(sf->mine_alloc)); 126 PetscCall(PetscFree(sf->remote_alloc)); 127 sf->nranks = -1; 128 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 129 sf->degreeknown = PETSC_FALSE; 130 PetscCall(PetscFree(sf->degree)); 131 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 132 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 133 134 if (sf->multi) sf->multi->multi = NULL; 135 PetscCall(PetscSFDestroy(&sf->multi)); 136 137 PetscCall(PetscLayoutDestroy(&sf->map)); 138 139 #if defined(PETSC_HAVE_DEVICE) 140 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 141 #endif 142 143 sf->setupcalled = PETSC_FALSE; 144 PetscFunctionReturn(PETSC_SUCCESS); 145 } 146 147 /*@ 148 PetscSFSetType - Set the `PetscSF` communication implementation 149 150 Collective 151 152 Input Parameters: 153 + sf - the `PetscSF` context 154 - type - a known method 155 .vb 156 PETSCSFWINDOW - MPI-2/3 one-sided 157 PETSCSFBASIC - basic implementation using MPI-1 two-sided 158 .ve 159 160 Options Database Key: 161 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 162 163 Level: intermediate 164 165 Notes: 166 See `PetscSFType` for possible values 167 168 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()` 169 @*/ 170 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 171 { 172 PetscBool match; 173 PetscErrorCode (*r)(PetscSF); 174 175 PetscFunctionBegin; 176 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 177 PetscAssertPointer(type, 2); 178 179 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 180 if (match) PetscFunctionReturn(PETSC_SUCCESS); 181 182 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 183 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 184 /* Destroy the previous PetscSF implementation context */ 185 PetscTryTypeMethod(sf, Destroy); 186 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 187 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 188 PetscCall((*r)(sf)); 189 PetscFunctionReturn(PETSC_SUCCESS); 190 } 191 192 /*@ 193 PetscSFGetType - Get the `PetscSF` communication implementation 194 195 Not Collective 196 197 Input Parameter: 198 . sf - the `PetscSF` context 199 200 Output Parameter: 201 . type - the `PetscSF` type name 202 203 Level: intermediate 204 205 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 206 @*/ 207 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 208 { 209 PetscFunctionBegin; 210 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 211 PetscAssertPointer(type, 2); 212 *type = ((PetscObject)sf)->type_name; 213 PetscFunctionReturn(PETSC_SUCCESS); 214 } 215 216 /*@ 217 PetscSFDestroy - destroy a star forest 218 219 Collective 220 221 Input Parameter: 222 . sf - address of star forest 223 224 Level: intermediate 225 226 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 227 @*/ 228 PetscErrorCode PetscSFDestroy(PetscSF *sf) 229 { 230 PetscFunctionBegin; 231 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 232 PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 233 if (--((PetscObject)*sf)->refct > 0) { 234 *sf = NULL; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 PetscCall(PetscSFReset(*sf)); 238 PetscTryTypeMethod(*sf, Destroy); 239 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 240 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 241 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 242 if ((*sf)->use_stream_aware_mpi) { 243 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 244 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 245 } 246 #endif 247 PetscCall(PetscHeaderDestroy(sf)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 252 { 253 PetscInt i, nleaves; 254 PetscMPIInt size; 255 const PetscInt *ilocal; 256 const PetscSFNode *iremote; 257 258 PetscFunctionBegin; 259 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 260 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 261 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 262 for (i = 0; i < nleaves; i++) { 263 const PetscInt rank = iremote[i].rank; 264 const PetscInt remote = iremote[i].index; 265 const PetscInt leaf = ilocal ? ilocal[i] : i; 266 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 267 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 268 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 269 } 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 /*@ 274 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 275 276 Collective 277 278 Input Parameter: 279 . sf - star forest communication object 280 281 Level: beginner 282 283 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 284 @*/ 285 PetscErrorCode PetscSFSetUp(PetscSF sf) 286 { 287 PetscFunctionBegin; 288 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 289 PetscSFCheckGraphSet(sf, 1); 290 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 291 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 292 PetscCall(PetscSFCheckGraphValid_Private(sf)); 293 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 294 PetscTryTypeMethod(sf, SetUp); 295 #if defined(PETSC_HAVE_CUDA) 296 if (sf->backend == PETSCSF_BACKEND_CUDA) { 297 sf->ops->Malloc = PetscSFMalloc_CUDA; 298 sf->ops->Free = PetscSFFree_CUDA; 299 } 300 #endif 301 #if defined(PETSC_HAVE_HIP) 302 if (sf->backend == PETSCSF_BACKEND_HIP) { 303 sf->ops->Malloc = PetscSFMalloc_HIP; 304 sf->ops->Free = PetscSFFree_HIP; 305 } 306 #endif 307 308 #if defined(PETSC_HAVE_KOKKOS) 309 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 310 sf->ops->Malloc = PetscSFMalloc_Kokkos; 311 sf->ops->Free = PetscSFFree_Kokkos; 312 } 313 #endif 314 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 315 sf->setupcalled = PETSC_TRUE; 316 PetscFunctionReturn(PETSC_SUCCESS); 317 } 318 319 /*@ 320 PetscSFSetFromOptions - set `PetscSF` options using the options database 321 322 Logically Collective 323 324 Input Parameter: 325 . sf - star forest 326 327 Options Database Keys: 328 + -sf_type - implementation type, see `PetscSFSetType()` 329 . -sf_rank_order - sort composite points for gathers and scatters in MPI rank order, gathers are non-deterministic otherwise 330 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 331 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 332 If true, this option only works with `-use_gpu_aware_mpi 1`. 333 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 334 If true, this option only works with `-use_gpu_aware_mpi 1`. 335 336 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 337 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 338 the only available is kokkos. 339 340 Level: intermediate 341 342 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 343 @*/ 344 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 345 { 346 PetscSFType deft; 347 char type[256]; 348 PetscBool flg; 349 350 PetscFunctionBegin; 351 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 352 PetscObjectOptionsBegin((PetscObject)sf); 353 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 354 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 355 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 356 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 357 PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL)); 358 #if defined(PETSC_HAVE_DEVICE) 359 { 360 char backendstr[32] = {0}; 361 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 362 /* Change the defaults set in PetscSFCreate() with command line options */ 363 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 364 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 365 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 366 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 367 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 368 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 369 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 370 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 371 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 372 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 373 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 374 #elif defined(PETSC_HAVE_KOKKOS) 375 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 376 #endif 377 378 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 379 if (sf->use_stream_aware_mpi) { 380 MPI_Info info; 381 382 PetscCallMPI(MPI_Info_create(&info)); 383 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 384 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 385 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 386 PetscCallMPI(MPI_Info_free(&info)); 387 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 388 } 389 #endif 390 } 391 #endif 392 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 393 PetscOptionsEnd(); 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 397 /*@ 398 PetscSFSetRankOrder - sort multi-points for gathers and scatters by MPI rank order 399 400 Logically Collective 401 402 Input Parameters: 403 + sf - star forest 404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (false has a lower setup cost, but is non-deterministic) 405 406 Level: advanced 407 408 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 409 @*/ 410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 411 { 412 PetscFunctionBegin; 413 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 414 PetscValidLogicalCollectiveBool(sf, flg, 2); 415 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 416 sf->rankorder = flg; 417 PetscFunctionReturn(PETSC_SUCCESS); 418 } 419 420 /*@ 421 PetscSFSetGraph - Set a parallel star forest 422 423 Collective 424 425 Input Parameters: 426 + sf - star forest 427 . nroots - number of root vertices on the current MPI process (these are possible targets for other process to attach leaves) 428 . nleaves - number of leaf vertices on the current MPI process, each of these references a root on any process 429 . ilocal - locations of leaves in leafdata buffers (locations must be >= 0, enforced during setup in debug mode), pass `NULL` for contiguous storage (same as passing (0, 1, 2, ..., nleaves-1)) 430 . localmode - copy mode for `ilocal` 431 . iremote - remote locations of root vertices for each leaf on the current process, length is `nleaves' (locations must be >= 0, enforced during setup in debug mode) 432 - remotemode - copy mode for `iremote` 433 434 Level: intermediate 435 436 Notes: 437 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 438 439 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 440 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 441 PETSc might modify the respective array; 442 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 443 If `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 444 445 `PetscSFSetGraphWithPattern()` is an alternative approach to provide certain communication patterns that have extra optimizations. 446 447 Fortran Notes: 448 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 449 450 Developer Notes: 451 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 452 This also allows to compare leaf sets of two `PetscSF`s easily. 453 454 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`, `PetscSFSetGraphWithPattern()` 455 @*/ 456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode) 457 { 458 PetscBool unique, contiguous; 459 460 PetscFunctionBegin; 461 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 462 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 463 if (nleaves > 0) PetscAssertPointer(iremote, 6); 464 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 465 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 466 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 467 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 468 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 469 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 470 471 if (sf->nroots >= 0) { /* Reset only if graph already set */ 472 PetscCall(PetscSFReset(sf)); 473 } 474 475 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 476 if (PetscDefined(USE_DEBUG)) { 477 PetscMPIInt size; 478 479 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 480 for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); 481 } 482 483 sf->nroots = nroots; 484 sf->nleaves = nleaves; 485 486 if (localmode == PETSC_COPY_VALUES && ilocal) { 487 PetscInt *tlocal = NULL; 488 489 PetscCall(PetscMalloc1(nleaves, &tlocal)); 490 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 491 ilocal = tlocal; 492 } 493 if (remotemode == PETSC_COPY_VALUES) { 494 PetscSFNode *tremote = NULL; 495 496 PetscCall(PetscMalloc1(nleaves, &tremote)); 497 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 498 iremote = tremote; 499 } 500 501 if (nleaves && ilocal) { 502 PetscSFNode work; 503 504 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 505 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 506 unique = PetscNot(unique); 507 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 508 sf->minleaf = ilocal[0]; 509 sf->maxleaf = ilocal[nleaves - 1]; 510 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 511 } else { 512 sf->minleaf = 0; 513 sf->maxleaf = nleaves - 1; 514 unique = PETSC_TRUE; 515 contiguous = PETSC_TRUE; 516 } 517 518 if (contiguous) { 519 if (localmode == PETSC_USE_POINTER) { 520 ilocal = NULL; 521 } else { 522 PetscCall(PetscFree(ilocal)); 523 } 524 } 525 sf->mine = ilocal; 526 if (localmode == PETSC_USE_POINTER) { 527 sf->mine_alloc = NULL; 528 } else { 529 sf->mine_alloc = ilocal; 530 } 531 if (PetscDefined(USE_DEBUG)) { 532 PetscMPIInt size; 533 534 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 535 for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); 536 } 537 sf->remote = iremote; 538 if (remotemode == PETSC_USE_POINTER) { 539 sf->remote_alloc = NULL; 540 } else { 541 sf->remote_alloc = iremote; 542 } 543 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 544 sf->graphset = PETSC_TRUE; 545 PetscFunctionReturn(PETSC_SUCCESS); 546 } 547 548 /*@ 549 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 550 551 Collective 552 553 Input Parameters: 554 + sf - The `PetscSF` 555 . map - Layout of roots over all processes (not used when pattern is `PETSCSF_PATTERN_ALLTOALL`) 556 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 557 558 Level: intermediate 559 560 Notes: 561 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `root` and its `PetscLayout` is `map`. 562 `n` and `N` are the local and global sizes of `root` respectively. 563 564 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to 565 sequential vectors `leaves` on all MPI processes. 566 567 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to a 568 sequential vector `leaves` on MPI rank 0. 569 570 With `PETSCSF_PATTERN_ALLTOALL`, map is not used. Suppose NP is the size of `sf`'s communicator. The routine 571 creates a graph where every MPI process has NP leaves and NP roots. On MPI rank i, its leaf j is connected to root i 572 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 573 not mean one can not send multiple items. One needs to create a new MPI datatype for the multiple data 574 items with `MPI_Type_contiguous` and use that as the <unit> argument in the `PetscSF` routines. In this case, roots and leaves are symmetric. 575 576 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 577 @*/ 578 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 579 { 580 MPI_Comm comm; 581 PetscInt n, N, res[2]; 582 PetscMPIInt rank, size; 583 PetscSFType type; 584 585 PetscFunctionBegin; 586 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 587 PetscValidLogicalCollectiveEnum(sf, pattern, 3); 588 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 589 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 590 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 591 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 592 PetscCallMPI(MPI_Comm_size(comm, &size)); 593 594 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 595 PetscInt sizei = size; 596 597 type = PETSCSFALLTOALL; 598 PetscCall(PetscLayoutCreate(comm, &sf->map)); 599 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 600 PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei))); 601 PetscCall(PetscLayoutSetUp(sf->map)); 602 } else { 603 PetscCall(PetscLayoutGetLocalSize(map, &n)); 604 PetscCall(PetscLayoutGetSize(map, &N)); 605 res[0] = n; 606 res[1] = -n; 607 /* Check if n are same over all ranks so that we can optimize it */ 608 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 609 if (res[0] == -res[1]) { /* same n */ 610 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 611 } else { 612 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 613 } 614 PetscCall(PetscLayoutReference(map, &sf->map)); 615 } 616 PetscCall(PetscSFSetType(sf, type)); 617 618 sf->pattern = pattern; 619 sf->mine = NULL; /* Contiguous */ 620 621 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 622 Also set other easy stuff. 623 */ 624 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 625 sf->nleaves = N; 626 sf->nroots = n; 627 sf->nranks = size; 628 sf->minleaf = 0; 629 sf->maxleaf = N - 1; 630 } else if (pattern == PETSCSF_PATTERN_GATHER) { 631 sf->nleaves = rank ? 0 : N; 632 sf->nroots = n; 633 sf->nranks = rank ? 0 : size; 634 sf->minleaf = 0; 635 sf->maxleaf = rank ? -1 : N - 1; 636 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 637 sf->nleaves = size; 638 sf->nroots = size; 639 sf->nranks = size; 640 sf->minleaf = 0; 641 sf->maxleaf = size - 1; 642 } 643 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 644 sf->graphset = PETSC_TRUE; 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 /*@ 649 PetscSFCreateInverseSF - given a `PetscSF` in which all roots have degree 1 (exactly one leaf), creates the inverse map 650 651 Collective 652 653 Input Parameter: 654 . sf - star forest to invert 655 656 Output Parameter: 657 . isf - inverse of `sf` 658 659 Level: advanced 660 661 Notes: 662 All roots must have degree 1. 663 664 The local space may be a permutation, but cannot be sparse. 665 666 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 667 @*/ 668 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 669 { 670 PetscMPIInt rank; 671 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 672 const PetscInt *ilocal; 673 PetscSFNode *roots, *leaves; 674 675 PetscFunctionBegin; 676 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 677 PetscSFCheckGraphSet(sf, 1); 678 PetscAssertPointer(isf, 2); 679 680 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 681 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 682 683 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 684 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 685 for (i = 0; i < maxlocal; i++) { 686 leaves[i].rank = rank; 687 leaves[i].index = i; 688 } 689 for (i = 0; i < nroots; i++) { 690 roots[i].rank = -1; 691 roots[i].index = -1; 692 } 693 PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 694 PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 695 696 /* Check whether our leaves are sparse */ 697 for (i = 0, count = 0; i < nroots; i++) 698 if (roots[i].rank >= 0) count++; 699 if (count == nroots) newilocal = NULL; 700 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 701 for (i = 0, count = 0; i < nroots; i++) { 702 if (roots[i].rank >= 0) { 703 newilocal[count] = i; 704 roots[count].rank = roots[i].rank; 705 roots[count].index = roots[i].index; 706 count++; 707 } 708 } 709 } 710 711 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 712 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 713 PetscCall(PetscFree2(roots, leaves)); 714 PetscFunctionReturn(PETSC_SUCCESS); 715 } 716 717 /*@ 718 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 719 720 Collective 721 722 Input Parameters: 723 + sf - communication object to duplicate 724 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 725 726 Output Parameter: 727 . newsf - new communication object 728 729 Level: beginner 730 731 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 732 @*/ 733 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 734 { 735 PetscSFType type; 736 MPI_Datatype dtype = MPIU_SCALAR; 737 738 PetscFunctionBegin; 739 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 740 PetscValidLogicalCollectiveEnum(sf, opt, 2); 741 PetscAssertPointer(newsf, 3); 742 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 743 PetscCall(PetscSFGetType(sf, &type)); 744 if (type) PetscCall(PetscSFSetType(*newsf, type)); 745 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 746 if (opt == PETSCSF_DUPLICATE_GRAPH) { 747 PetscSFCheckGraphSet(sf, 1); 748 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 749 PetscInt nroots, nleaves; 750 const PetscInt *ilocal; 751 const PetscSFNode *iremote; 752 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 753 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 754 } else { 755 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 756 } 757 } 758 /* Since oldtype is committed, so is newtype, according to MPI */ 759 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 760 (*newsf)->vscat.bs = sf->vscat.bs; 761 (*newsf)->vscat.unit = dtype; 762 (*newsf)->vscat.to_n = sf->vscat.to_n; 763 (*newsf)->vscat.from_n = sf->vscat.from_n; 764 /* Do not copy lsf. Build it on demand since it is rarely used */ 765 766 #if defined(PETSC_HAVE_DEVICE) 767 (*newsf)->backend = sf->backend; 768 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 769 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 770 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 771 #endif 772 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 773 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 774 PetscFunctionReturn(PETSC_SUCCESS); 775 } 776 777 /*@C 778 PetscSFGetGraph - Get the graph specifying a parallel star forest 779 780 Not Collective 781 782 Input Parameter: 783 . sf - star forest 784 785 Output Parameters: 786 + nroots - number of root vertices on the current process (these are possible targets for other MPI process to attach leaves) 787 . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process 788 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 789 - iremote - remote locations of root vertices for each leaf on the current process 790 791 Level: intermediate 792 793 Notes: 794 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 795 796 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 797 798 Fortran Note: 799 Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed 800 801 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 802 @*/ 803 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[]) 804 { 805 PetscFunctionBegin; 806 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 807 if (sf->ops->GetGraph) { 808 PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 809 } else { 810 if (nroots) *nroots = sf->nroots; 811 if (nleaves) *nleaves = sf->nleaves; 812 if (ilocal) *ilocal = sf->mine; 813 if (iremote) *iremote = sf->remote; 814 } 815 PetscFunctionReturn(PETSC_SUCCESS); 816 } 817 818 /*@ 819 PetscSFGetLeafRange - Get the active leaf ranges 820 821 Not Collective 822 823 Input Parameter: 824 . sf - star forest 825 826 Output Parameters: 827 + minleaf - minimum active leaf on this MPI process. Returns 0 if there are no leaves. 828 - maxleaf - maximum active leaf on this MPI process. Returns -1 if there are no leaves. 829 830 Level: developer 831 832 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 833 @*/ 834 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 835 { 836 PetscFunctionBegin; 837 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 838 PetscSFCheckGraphSet(sf, 1); 839 if (minleaf) *minleaf = sf->minleaf; 840 if (maxleaf) *maxleaf = sf->maxleaf; 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 /*@ 845 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 846 847 Collective 848 849 Input Parameters: 850 + A - the star forest 851 . obj - Optional object that provides the prefix for the option names 852 - name - command line option 853 854 Level: intermediate 855 856 Note: 857 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 858 859 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 860 @*/ 861 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 862 { 863 PetscFunctionBegin; 864 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 865 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 866 PetscFunctionReturn(PETSC_SUCCESS); 867 } 868 869 /*@ 870 PetscSFView - view a star forest 871 872 Collective 873 874 Input Parameters: 875 + sf - star forest 876 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 877 878 Level: beginner 879 880 .seealso: [](sec_petscsf), `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 881 @*/ 882 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 883 { 884 PetscBool isascii; 885 PetscViewerFormat format; 886 887 PetscFunctionBegin; 888 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 889 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 890 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 891 PetscCheckSameComm(sf, 1, viewer, 2); 892 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 893 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 894 if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 895 PetscMPIInt rank; 896 PetscInt j; 897 898 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 899 PetscCall(PetscViewerASCIIPushTab(viewer)); 900 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 901 if (!sf->graphset) { 902 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 903 PetscCall(PetscViewerASCIIPopTab(viewer)); 904 PetscFunctionReturn(PETSC_SUCCESS); 905 } 906 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 907 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 908 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 909 for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 910 PetscCall(PetscViewerFlush(viewer)); 911 PetscCall(PetscViewerGetFormat(viewer, &format)); 912 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 913 PetscMPIInt *tmpranks, *perm; 914 915 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 916 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 917 for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i; 918 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 919 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 920 for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) { 921 PetscMPIInt i = perm[ii]; 922 923 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 924 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 925 } 926 PetscCall(PetscFree2(tmpranks, perm)); 927 } 928 PetscCall(PetscViewerFlush(viewer)); 929 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 930 } 931 PetscCall(PetscViewerASCIIPopTab(viewer)); 932 } 933 PetscTryTypeMethod(sf, View, viewer); 934 PetscFunctionReturn(PETSC_SUCCESS); 935 } 936 937 /*@C 938 PetscSFGetRootRanks - Get the root MPI ranks and number of vertices referenced by leaves on this process 939 940 Not Collective 941 942 Input Parameter: 943 . sf - star forest 944 945 Output Parameters: 946 + nranks - number of MPI processes referenced by local part 947 . ranks - [`nranks`] array of MPI ranks 948 . roffset - [`nranks`+1] offset in `rmine` and `rremote` for each MPI process 949 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote MPI process, or `NULL` 950 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote MPI process, or `NULL` 951 952 Level: developer 953 954 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetLeafRanks()` 955 @*/ 956 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[]) 957 { 958 PetscFunctionBegin; 959 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 960 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 961 if (sf->ops->GetRootRanks) { 962 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 963 } else { 964 /* The generic implementation */ 965 if (nranks) *nranks = sf->nranks; 966 if (ranks) *ranks = sf->ranks; 967 if (roffset) *roffset = sf->roffset; 968 if (rmine) *rmine = sf->rmine; 969 if (rremote) *rremote = sf->rremote; 970 } 971 PetscFunctionReturn(PETSC_SUCCESS); 972 } 973 974 /*@C 975 PetscSFGetLeafRanks - Get leaf MPI ranks referencing roots on this process 976 977 Not Collective 978 979 Input Parameter: 980 . sf - star forest 981 982 Output Parameters: 983 + niranks - number of leaf MPI processes referencing roots on this process 984 . iranks - [`niranks`] array of MPI ranks 985 . ioffset - [`niranks`+1] offset in `irootloc` for each MPI process 986 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf MPI process 987 988 Level: developer 989 990 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()` 991 @*/ 992 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[]) 993 { 994 PetscFunctionBegin; 995 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 996 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 997 if (sf->ops->GetLeafRanks) { 998 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 999 } else { 1000 PetscSFType type; 1001 PetscCall(PetscSFGetType(sf, &type)); 1002 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1003 } 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 1008 { 1009 PetscInt i; 1010 for (i = 0; i < n; i++) { 1011 if (needle == list[i]) return PETSC_TRUE; 1012 } 1013 return PETSC_FALSE; 1014 } 1015 1016 /*@C 1017 PetscSFSetUpRanks - Set up data structures associated with MPI ranks; this is for internal use by `PetscSF` implementations. 1018 1019 Collective 1020 1021 Input Parameters: 1022 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1023 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1024 1025 Level: developer 1026 1027 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()` 1028 @*/ 1029 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1030 { 1031 PetscHMapI table; 1032 PetscHashIter pos; 1033 PetscMPIInt size, groupsize, *groupranks, *ranks; 1034 PetscInt *rcount; 1035 PetscInt irank, sfnrank, ranksi; 1036 PetscMPIInt i, orank = -1; 1037 1038 PetscFunctionBegin; 1039 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1040 PetscSFCheckGraphSet(sf, 1); 1041 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1042 PetscCall(PetscHMapICreateWithSize(10, &table)); 1043 for (i = 0; i < sf->nleaves; i++) { 1044 /* Log 1-based rank */ 1045 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1046 } 1047 PetscCall(PetscHMapIGetSize(table, &sfnrank)); 1048 PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks)); 1049 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1050 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1051 PetscHashIterBegin(table, pos); 1052 for (i = 0; i < sf->nranks; i++) { 1053 PetscHashIterGetKey(table, pos, ranksi); 1054 PetscCall(PetscMPIIntCast(ranksi, &ranks[i])); 1055 PetscHashIterGetVal(table, pos, rcount[i]); 1056 PetscHashIterNext(table, pos); 1057 ranks[i]--; /* Convert back to 0-based */ 1058 } 1059 PetscCall(PetscHMapIDestroy(&table)); 1060 1061 /* We expect that dgroup is reliably "small" while nranks could be large */ 1062 { 1063 MPI_Group group = MPI_GROUP_NULL; 1064 PetscMPIInt *dgroupranks; 1065 1066 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1067 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1068 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1069 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1070 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1071 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1072 PetscCallMPI(MPI_Group_free(&group)); 1073 PetscCall(PetscFree(dgroupranks)); 1074 } 1075 1076 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1077 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1078 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1079 if (InList(ranks[i], groupsize, groupranks)) break; 1080 } 1081 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1082 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1083 } 1084 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1085 PetscMPIInt tmprank; 1086 PetscInt tmpcount; 1087 1088 tmprank = ranks[i]; 1089 tmpcount = rcount[i]; 1090 ranks[i] = ranks[sf->ndranks]; 1091 rcount[i] = rcount[sf->ndranks]; 1092 ranks[sf->ndranks] = tmprank; 1093 rcount[sf->ndranks] = tmpcount; 1094 sf->ndranks++; 1095 } 1096 } 1097 PetscCall(PetscFree(groupranks)); 1098 PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount)); 1099 if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1100 sf->roffset[0] = 0; 1101 for (i = 0; i < sf->nranks; i++) { 1102 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1103 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1104 rcount[i] = 0; 1105 } 1106 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1107 /* short circuit */ 1108 if (orank != sf->remote[i].rank) { 1109 /* Search for index of iremote[i].rank in sf->ranks */ 1110 PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank)); 1111 PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank)); 1112 if (irank < 0) { 1113 PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1114 if (irank >= 0) irank += sf->ndranks; 1115 } 1116 } 1117 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank); 1118 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1119 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1120 rcount[irank]++; 1121 } 1122 PetscCall(PetscFree2(rcount, ranks)); 1123 PetscFunctionReturn(PETSC_SUCCESS); 1124 } 1125 1126 /*@C 1127 PetscSFGetGroups - gets incoming and outgoing process groups 1128 1129 Collective 1130 1131 Input Parameter: 1132 . sf - star forest 1133 1134 Output Parameters: 1135 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1136 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1137 1138 Level: developer 1139 1140 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1141 @*/ 1142 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1143 { 1144 MPI_Group group = MPI_GROUP_NULL; 1145 1146 PetscFunctionBegin; 1147 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1148 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1149 if (sf->ingroup == MPI_GROUP_NULL) { 1150 PetscInt i; 1151 const PetscInt *indegree; 1152 PetscMPIInt rank, *outranks, *inranks, indegree0; 1153 PetscSFNode *remote; 1154 PetscSF bgcount; 1155 1156 /* Compute the number of incoming ranks */ 1157 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1158 for (i = 0; i < sf->nranks; i++) { 1159 remote[i].rank = sf->ranks[i]; 1160 remote[i].index = 0; 1161 } 1162 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1163 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1164 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1165 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1166 /* Enumerate the incoming ranks */ 1167 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1168 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1169 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1170 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1171 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1172 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1173 PetscCall(PetscMPIIntCast(indegree[0], &indegree0)); 1174 PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup)); 1175 PetscCallMPI(MPI_Group_free(&group)); 1176 PetscCall(PetscFree2(inranks, outranks)); 1177 PetscCall(PetscSFDestroy(&bgcount)); 1178 } 1179 *incoming = sf->ingroup; 1180 1181 if (sf->outgroup == MPI_GROUP_NULL) { 1182 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1183 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1184 PetscCallMPI(MPI_Group_free(&group)); 1185 } 1186 *outgoing = sf->outgroup; 1187 PetscFunctionReturn(PETSC_SUCCESS); 1188 } 1189 1190 /*@ 1191 PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 1192 1193 Collective 1194 1195 Input Parameter: 1196 . sf - star forest 1197 1198 Output Parameter: 1199 . rsf - the star forest with a single root per MPI process to perform communications 1200 1201 Level: developer 1202 1203 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 1204 @*/ 1205 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 1206 { 1207 PetscFunctionBegin; 1208 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1209 PetscAssertPointer(rsf, 2); 1210 if (!sf->rankssf) { 1211 PetscSFNode *rremotes; 1212 const PetscMPIInt *ranks; 1213 PetscMPIInt nranks; 1214 1215 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 1216 PetscCall(PetscMalloc1(nranks, &rremotes)); 1217 for (PetscInt i = 0; i < nranks; i++) { 1218 rremotes[i].rank = ranks[i]; 1219 rremotes[i].index = 0; 1220 } 1221 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 1222 PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 1223 } 1224 *rsf = sf->rankssf; 1225 PetscFunctionReturn(PETSC_SUCCESS); 1226 } 1227 1228 /*@ 1229 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1230 1231 Collective 1232 1233 Input Parameter: 1234 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1235 1236 Output Parameter: 1237 . multi - star forest with split roots, such that each root has degree exactly 1 (has one leaf) 1238 1239 Level: developer 1240 1241 Note: 1242 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1243 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1244 edge, it is a candidate for future optimization that might involve its removal. 1245 1246 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1247 @*/ 1248 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1249 { 1250 PetscFunctionBegin; 1251 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1252 PetscAssertPointer(multi, 2); 1253 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1254 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1255 *multi = sf->multi; 1256 sf->multi->multi = sf->multi; 1257 PetscFunctionReturn(PETSC_SUCCESS); 1258 } 1259 if (!sf->multi) { 1260 const PetscInt *indegree; 1261 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1262 PetscSFNode *remote; 1263 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1264 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1265 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1266 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1267 inoffset[0] = 0; 1268 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1269 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1270 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1271 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1272 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1273 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1274 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1275 } 1276 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1277 for (i = 0; i < sf->nleaves; i++) { 1278 remote[i].rank = sf->remote[i].rank; 1279 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1280 } 1281 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1282 sf->multi->multi = sf->multi; 1283 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1284 if (sf->rankorder) { /* Sort the ranks */ 1285 PetscMPIInt rank; 1286 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1287 PetscSFNode *newremote; 1288 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1289 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1290 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1291 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1292 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1293 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1294 /* Sort the incoming ranks at each vertex, build the inverse map */ 1295 for (i = 0; i < sf->nroots; i++) { 1296 PetscInt j; 1297 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1298 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1299 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1300 } 1301 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1302 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1303 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1304 for (i = 0; i < sf->nleaves; i++) { 1305 newremote[i].rank = sf->remote[i].rank; 1306 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1307 } 1308 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1309 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1310 } 1311 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1312 } 1313 *multi = sf->multi; 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } 1316 1317 /*@C 1318 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1319 1320 Collective 1321 1322 Input Parameters: 1323 + sf - original star forest 1324 . nselected - number of selected roots on this MPI process 1325 - selected - indices of the selected roots on this MPI process 1326 1327 Output Parameter: 1328 . esf - new star forest 1329 1330 Level: advanced 1331 1332 Note: 1333 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1334 be done by calling PetscSFGetGraph(). 1335 1336 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1337 @*/ 1338 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *esf) 1339 { 1340 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1341 const PetscInt *ilocal; 1342 signed char *rootdata, *leafdata, *leafmem; 1343 const PetscSFNode *iremote; 1344 PetscSFNode *new_iremote; 1345 MPI_Comm comm; 1346 1347 PetscFunctionBegin; 1348 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1349 PetscSFCheckGraphSet(sf, 1); 1350 if (nselected) PetscAssertPointer(selected, 3); 1351 PetscAssertPointer(esf, 4); 1352 1353 PetscCall(PetscSFSetUp(sf)); 1354 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1355 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1356 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1357 1358 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1359 PetscBool dups; 1360 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1361 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1362 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1363 } 1364 1365 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1366 else { 1367 /* A generic version of creating embedded sf */ 1368 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1369 maxlocal = maxleaf - minleaf + 1; 1370 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1371 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1372 /* Tag selected roots and bcast to leaves */ 1373 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1374 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1375 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1376 1377 /* Build esf with leaves that are still connected */ 1378 esf_nleaves = 0; 1379 for (i = 0; i < nleaves; i++) { 1380 j = ilocal ? ilocal[i] : i; 1381 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1382 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1383 */ 1384 esf_nleaves += (leafdata[j] ? 1 : 0); 1385 } 1386 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1387 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1388 for (i = n = 0; i < nleaves; i++) { 1389 j = ilocal ? ilocal[i] : i; 1390 if (leafdata[j]) { 1391 new_ilocal[n] = j; 1392 new_iremote[n].rank = iremote[i].rank; 1393 new_iremote[n].index = iremote[i].index; 1394 ++n; 1395 } 1396 } 1397 PetscCall(PetscSFCreate(comm, esf)); 1398 PetscCall(PetscSFSetFromOptions(*esf)); 1399 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1400 PetscCall(PetscFree2(rootdata, leafmem)); 1401 } 1402 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 /*@C 1407 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1408 1409 Collective 1410 1411 Input Parameters: 1412 + sf - original star forest 1413 . nselected - number of selected leaves on this MPI process 1414 - selected - indices of the selected leaves on this MPI process 1415 1416 Output Parameter: 1417 . newsf - new star forest 1418 1419 Level: advanced 1420 1421 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1422 @*/ 1423 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *newsf) 1424 { 1425 const PetscSFNode *iremote; 1426 PetscSFNode *new_iremote; 1427 const PetscInt *ilocal; 1428 PetscInt i, nroots, *leaves, *new_ilocal; 1429 MPI_Comm comm; 1430 1431 PetscFunctionBegin; 1432 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1433 PetscSFCheckGraphSet(sf, 1); 1434 if (nselected) PetscAssertPointer(selected, 3); 1435 PetscAssertPointer(newsf, 4); 1436 1437 /* Uniq selected[] and put results in leaves[] */ 1438 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1439 PetscCall(PetscMalloc1(nselected, &leaves)); 1440 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1441 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1442 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1443 1444 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1445 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1446 else { 1447 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1448 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1449 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1450 for (i = 0; i < nselected; ++i) { 1451 const PetscInt l = leaves[i]; 1452 new_ilocal[i] = ilocal ? ilocal[l] : l; 1453 new_iremote[i].rank = iremote[l].rank; 1454 new_iremote[i].index = iremote[l].index; 1455 } 1456 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1457 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1458 } 1459 PetscCall(PetscFree(leaves)); 1460 PetscFunctionReturn(PETSC_SUCCESS); 1461 } 1462 1463 /*@C 1464 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1465 1466 Collective 1467 1468 Input Parameters: 1469 + sf - star forest on which to communicate 1470 . unit - data type associated with each node 1471 . rootdata - buffer to broadcast 1472 - op - operation to use for reduction 1473 1474 Output Parameter: 1475 . leafdata - buffer to be reduced with values from each leaf's respective root 1476 1477 Level: intermediate 1478 1479 Note: 1480 When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers 1481 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should 1482 use `PetscSFBcastWithMemTypeBegin()` instead. 1483 1484 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1485 @*/ 1486 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1487 { 1488 PetscMemType rootmtype, leafmtype; 1489 1490 PetscFunctionBegin; 1491 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1492 PetscCall(PetscSFSetUp(sf)); 1493 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1494 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1495 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1496 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1497 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1498 PetscFunctionReturn(PETSC_SUCCESS); 1499 } 1500 1501 /*@C 1502 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1503 to `PetscSFBcastEnd()` 1504 1505 Collective 1506 1507 Input Parameters: 1508 + sf - star forest on which to communicate 1509 . unit - data type associated with each node 1510 . rootmtype - memory type of `rootdata` 1511 . rootdata - buffer to broadcast 1512 . leafmtype - memory type of `leafdata` 1513 - op - operation to use for reduction 1514 1515 Output Parameter: 1516 . leafdata - buffer to be reduced with values from each leaf's respective root 1517 1518 Level: intermediate 1519 1520 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1521 @*/ 1522 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1523 { 1524 PetscFunctionBegin; 1525 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1526 PetscCall(PetscSFSetUp(sf)); 1527 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1528 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1529 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1530 PetscFunctionReturn(PETSC_SUCCESS); 1531 } 1532 1533 /*@C 1534 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1535 1536 Collective 1537 1538 Input Parameters: 1539 + sf - star forest 1540 . unit - data type 1541 . rootdata - buffer to broadcast 1542 - op - operation to use for reduction 1543 1544 Output Parameter: 1545 . leafdata - buffer to be reduced with values from each leaf's respective root 1546 1547 Level: intermediate 1548 1549 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1550 @*/ 1551 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1552 { 1553 PetscFunctionBegin; 1554 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1555 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1556 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1557 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1558 PetscFunctionReturn(PETSC_SUCCESS); 1559 } 1560 1561 /*@C 1562 PetscSFReduceBegin - begin reduction (communication) of `leafdata` into `rootdata`, to be completed with call to `PetscSFReduceEnd()` 1563 1564 Collective 1565 1566 Input Parameters: 1567 + sf - star forest 1568 . unit - data type 1569 . leafdata - values to reduce (communicate) 1570 - op - reduction operation 1571 1572 Output Parameter: 1573 . rootdata - result of reduction of values (`leafdata`) from all leaves to each root 1574 1575 Level: intermediate 1576 1577 Note: 1578 When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers 1579 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should 1580 use `PetscSFReduceWithMemTypeBegin()` instead. 1581 1582 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`, `MPI_Datatype` 1583 @*/ 1584 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1585 { 1586 PetscMemType rootmtype, leafmtype; 1587 1588 PetscFunctionBegin; 1589 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1590 PetscCall(PetscSFSetUp(sf)); 1591 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1592 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1593 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1594 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1595 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1596 PetscFunctionReturn(PETSC_SUCCESS); 1597 } 1598 1599 /*@C 1600 PetscSFReduceWithMemTypeBegin - begin reduction of `leafdata` into `rootdata` with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1601 1602 Collective 1603 1604 Input Parameters: 1605 + sf - star forest 1606 . unit - data type 1607 . leafmtype - memory type of `leafdata` 1608 . leafdata - values to reduce 1609 . rootmtype - memory type of `rootdata` 1610 - op - reduction operation 1611 1612 Output Parameter: 1613 . rootdata - result of reduction of values from all leaves of each root 1614 1615 Level: intermediate 1616 1617 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1618 @*/ 1619 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1620 { 1621 PetscFunctionBegin; 1622 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1623 PetscCall(PetscSFSetUp(sf)); 1624 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1625 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1626 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1627 PetscFunctionReturn(PETSC_SUCCESS); 1628 } 1629 1630 /*@C 1631 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1632 1633 Collective 1634 1635 Input Parameters: 1636 + sf - star forest 1637 . unit - data type 1638 . leafdata - values to reduce 1639 - op - reduction operation 1640 1641 Output Parameter: 1642 . rootdata - result of reduction of values from all leaves of each root 1643 1644 Level: intermediate 1645 1646 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1647 @*/ 1648 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1649 { 1650 PetscFunctionBegin; 1651 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1652 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1653 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1654 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1655 PetscFunctionReturn(PETSC_SUCCESS); 1656 } 1657 1658 /*@C 1659 PetscSFFetchAndOpBegin - begin operation that fetches values from `rootdata` and updates it atomically by applying operation using `leafdata`, 1660 to be completed with `PetscSFFetchAndOpEnd()` 1661 1662 Collective 1663 1664 Input Parameters: 1665 + sf - star forest 1666 . unit - data type 1667 . leafdata - leaf values to use in reduction 1668 - op - operation to use for reduction 1669 1670 Output Parameters: 1671 + rootdata - root values to be updated, input state is seen by first process to perform an update 1672 - leafupdate - state at each leaf's respective root immediately prior to atomic update 1673 1674 Level: advanced 1675 1676 Note: 1677 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1678 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1679 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1680 integers. 1681 1682 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1683 @*/ 1684 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1685 { 1686 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1687 1688 PetscFunctionBegin; 1689 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1690 PetscCall(PetscSFSetUp(sf)); 1691 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1692 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1693 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1694 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1695 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1696 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1697 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1698 PetscFunctionReturn(PETSC_SUCCESS); 1699 } 1700 1701 /*@C 1702 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1703 applying operation using `leafdata`, to be completed with `PetscSFFetchAndOpEnd()` 1704 1705 Collective 1706 1707 Input Parameters: 1708 + sf - star forest 1709 . unit - data type 1710 . rootmtype - memory type of `rootdata` 1711 . leafmtype - memory type of `leafdata` 1712 . leafdata - leaf values to use in reduction 1713 . leafupdatemtype - memory type of `leafupdate` 1714 - op - operation to use for reduction 1715 1716 Output Parameters: 1717 + rootdata - root values to be updated, input state is seen by first process to perform an update 1718 - leafupdate - state at each leaf's respective root immediately prior to atomic update 1719 1720 Level: advanced 1721 1722 Note: 1723 See `PetscSFFetchAndOpBegin()` for more details. 1724 1725 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1726 @*/ 1727 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1728 { 1729 PetscFunctionBegin; 1730 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1731 PetscCall(PetscSFSetUp(sf)); 1732 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1733 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1734 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1735 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1736 PetscFunctionReturn(PETSC_SUCCESS); 1737 } 1738 1739 /*@C 1740 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1741 to fetch values from roots and update atomically by applying operation using `leafdata` 1742 1743 Collective 1744 1745 Input Parameters: 1746 + sf - star forest 1747 . unit - data type 1748 . leafdata - leaf values to use in reduction 1749 - op - operation to use for reduction 1750 1751 Output Parameters: 1752 + rootdata - root values to be updated, input state is seen by first process to perform an update 1753 - leafupdate - state at each leaf's respective root immediately prior to atomic update 1754 1755 Level: advanced 1756 1757 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1758 @*/ 1759 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1760 { 1761 PetscFunctionBegin; 1762 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1763 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1764 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1765 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1766 PetscFunctionReturn(PETSC_SUCCESS); 1767 } 1768 1769 /*@C 1770 PetscSFComputeDegreeBegin - begin computation of the degree of each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1771 1772 Collective 1773 1774 Input Parameter: 1775 . sf - star forest 1776 1777 Output Parameter: 1778 . degree - degree (the number of leaves) of each root vertex 1779 1780 Level: advanced 1781 1782 Note: 1783 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1784 1785 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1786 @*/ 1787 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[]) 1788 { 1789 PetscFunctionBegin; 1790 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1791 PetscSFCheckGraphSet(sf, 1); 1792 PetscAssertPointer(degree, 2); 1793 if (!sf->degreeknown) { 1794 PetscInt i, nroots = sf->nroots, maxlocal; 1795 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1796 maxlocal = sf->maxleaf - sf->minleaf + 1; 1797 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1798 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1799 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1800 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1801 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1802 } 1803 *degree = NULL; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 /*@C 1808 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1809 1810 Collective 1811 1812 Input Parameter: 1813 . sf - star forest 1814 1815 Output Parameter: 1816 . degree - degree of each root vertex 1817 1818 Level: developer 1819 1820 Note: 1821 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1822 1823 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1824 @*/ 1825 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[]) 1826 { 1827 PetscFunctionBegin; 1828 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1829 PetscSFCheckGraphSet(sf, 1); 1830 PetscAssertPointer(degree, 2); 1831 if (!sf->degreeknown) { 1832 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1833 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1834 PetscCall(PetscFree(sf->degreetmp)); 1835 sf->degreeknown = PETSC_TRUE; 1836 } 1837 *degree = sf->degree; 1838 PetscFunctionReturn(PETSC_SUCCESS); 1839 } 1840 1841 /*@C 1842 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1843 Each multi-root is assigned index of the corresponding original root. 1844 1845 Collective 1846 1847 Input Parameters: 1848 + sf - star forest 1849 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()` and `PetscSFComputeDegreeEnd()` 1850 1851 Output Parameters: 1852 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1853 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1854 1855 Level: developer 1856 1857 Note: 1858 The returned array `multiRootsOrigNumbering` should be destroyed with `PetscFree()` when no longer needed. 1859 1860 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1861 @*/ 1862 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1863 { 1864 PetscSF msf; 1865 PetscInt k = 0, nroots, nmroots; 1866 1867 PetscFunctionBegin; 1868 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1869 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1870 if (nroots) PetscAssertPointer(degree, 2); 1871 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1872 PetscAssertPointer(multiRootsOrigNumbering, 4); 1873 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1874 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1875 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1876 for (PetscInt i = 0; i < nroots; i++) { 1877 if (!degree[i]) continue; 1878 for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1879 } 1880 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1881 if (nMultiRoots) *nMultiRoots = nmroots; 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 /*@C 1886 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1887 1888 Collective 1889 1890 Input Parameters: 1891 + sf - star forest 1892 . unit - data type 1893 - leafdata - leaf data to gather to roots 1894 1895 Output Parameter: 1896 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has) 1897 1898 Level: intermediate 1899 1900 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1901 @*/ 1902 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1903 { 1904 PetscSF multi = NULL; 1905 1906 PetscFunctionBegin; 1907 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1908 PetscCall(PetscSFSetUp(sf)); 1909 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1910 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1911 PetscFunctionReturn(PETSC_SUCCESS); 1912 } 1913 1914 /*@C 1915 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1916 1917 Collective 1918 1919 Input Parameters: 1920 + sf - star forest 1921 . unit - data type 1922 - leafdata - leaf data to gather to roots 1923 1924 Output Parameter: 1925 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has) 1926 1927 Level: intermediate 1928 1929 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1930 @*/ 1931 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1932 { 1933 PetscSF multi = NULL; 1934 1935 PetscFunctionBegin; 1936 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1937 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1938 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1939 PetscFunctionReturn(PETSC_SUCCESS); 1940 } 1941 1942 /*@C 1943 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1944 1945 Collective 1946 1947 Input Parameters: 1948 + sf - star forest 1949 . unit - data type 1950 - multirootdata - root buffer to send to each leaf, one unit of data is provided to each leaf thus the amount of space per root is equal to its degree (which is the number of leaves that root has) 1951 1952 Output Parameter: 1953 . leafdata - leaf data to be update with personal data from each respective root 1954 1955 Level: intermediate 1956 1957 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1958 @*/ 1959 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1960 { 1961 PetscSF multi = NULL; 1962 1963 PetscFunctionBegin; 1964 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1965 PetscCall(PetscSFSetUp(sf)); 1966 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1967 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1968 PetscFunctionReturn(PETSC_SUCCESS); 1969 } 1970 1971 /*@C 1972 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1973 1974 Collective 1975 1976 Input Parameters: 1977 + sf - star forest 1978 . unit - data type 1979 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1980 1981 Output Parameter: 1982 . leafdata - leaf data to be update with personal data from each respective root 1983 1984 Level: intermediate 1985 1986 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1987 @*/ 1988 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1989 { 1990 PetscSF multi = NULL; 1991 1992 PetscFunctionBegin; 1993 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1994 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1995 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1996 PetscFunctionReturn(PETSC_SUCCESS); 1997 } 1998 1999 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 2000 { 2001 PetscInt i, n, nleaves; 2002 const PetscInt *ilocal = NULL; 2003 PetscHSetI seen; 2004 2005 PetscFunctionBegin; 2006 if (PetscDefined(USE_DEBUG)) { 2007 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 2008 PetscCall(PetscHSetICreate(&seen)); 2009 for (i = 0; i < nleaves; i++) { 2010 const PetscInt leaf = ilocal ? ilocal[i] : i; 2011 PetscCall(PetscHSetIAdd(seen, leaf)); 2012 } 2013 PetscCall(PetscHSetIGetSize(seen, &n)); 2014 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 2015 PetscCall(PetscHSetIDestroy(&seen)); 2016 } 2017 PetscFunctionReturn(PETSC_SUCCESS); 2018 } 2019 2020 /*@ 2021 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2022 2023 Input Parameters: 2024 + sfA - The first `PetscSF` 2025 - sfB - The second `PetscSF` 2026 2027 Output Parameter: 2028 . sfBA - The composite `PetscSF` 2029 2030 Level: developer 2031 2032 Notes: 2033 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2034 forests, i.e. the same leaf is not connected with different roots. 2035 2036 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 2037 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 2038 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 2039 `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfB`, on connected nodes. 2040 2041 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2042 @*/ 2043 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2044 { 2045 const PetscSFNode *remotePointsA, *remotePointsB; 2046 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 2047 const PetscInt *localPointsA, *localPointsB; 2048 PetscInt *localPointsBA; 2049 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 2050 PetscBool denseB; 2051 2052 PetscFunctionBegin; 2053 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2054 PetscSFCheckGraphSet(sfA, 1); 2055 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2056 PetscSFCheckGraphSet(sfB, 2); 2057 PetscCheckSameComm(sfA, 1, sfB, 2); 2058 PetscAssertPointer(sfBA, 3); 2059 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2060 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2061 2062 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2063 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2064 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2065 numRootsB; otherwise, garbage will be broadcasted. 2066 Example (comm size = 1): 2067 sfA: 0 <- (0, 0) 2068 sfB: 100 <- (0, 0) 2069 101 <- (0, 1) 2070 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2071 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2072 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2073 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2074 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2075 for (i = 0; i < numRootsB; i++) { 2076 reorderedRemotePointsA[i].rank = -1; 2077 reorderedRemotePointsA[i].index = -1; 2078 } 2079 for (i = 0; i < numLeavesA; i++) { 2080 PetscInt localp = localPointsA ? localPointsA[i] : i; 2081 2082 if (localp >= numRootsB) continue; 2083 reorderedRemotePointsA[localp] = remotePointsA[i]; 2084 } 2085 remotePointsA = reorderedRemotePointsA; 2086 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2087 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2088 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2089 leafdataB[i].rank = -1; 2090 leafdataB[i].index = -1; 2091 } 2092 PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2093 PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2094 PetscCall(PetscFree(reorderedRemotePointsA)); 2095 2096 denseB = (PetscBool)!localPointsB; 2097 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2098 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2099 else numLeavesBA++; 2100 } 2101 if (denseB) { 2102 localPointsBA = NULL; 2103 remotePointsBA = leafdataB; 2104 } else { 2105 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2106 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2107 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2108 const PetscInt l = localPointsB ? localPointsB[i] : i; 2109 2110 if (leafdataB[l - minleaf].rank == -1) continue; 2111 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2112 localPointsBA[numLeavesBA] = l; 2113 numLeavesBA++; 2114 } 2115 PetscCall(PetscFree(leafdataB)); 2116 } 2117 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2118 PetscCall(PetscSFSetFromOptions(*sfBA)); 2119 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2120 PetscFunctionReturn(PETSC_SUCCESS); 2121 } 2122 2123 /*@ 2124 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2125 2126 Input Parameters: 2127 + sfA - The first `PetscSF` 2128 - sfB - The second `PetscSF` 2129 2130 Output Parameter: 2131 . sfBA - The composite `PetscSF`. 2132 2133 Level: developer 2134 2135 Notes: 2136 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2137 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2138 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2139 2140 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2141 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2142 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` 2143 on `sfA`, then 2144 a `PetscSFReduceBegin()` and `PetscSFReduceEnd()` on `sfB`, on connected roots. 2145 2146 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2147 @*/ 2148 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2149 { 2150 const PetscSFNode *remotePointsA, *remotePointsB; 2151 PetscSFNode *remotePointsBA; 2152 const PetscInt *localPointsA, *localPointsB; 2153 PetscSFNode *reorderedRemotePointsA = NULL; 2154 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2155 MPI_Op op; 2156 #if defined(PETSC_USE_64BIT_INDICES) 2157 PetscBool iswin; 2158 #endif 2159 2160 PetscFunctionBegin; 2161 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2162 PetscSFCheckGraphSet(sfA, 1); 2163 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2164 PetscSFCheckGraphSet(sfB, 2); 2165 PetscCheckSameComm(sfA, 1, sfB, 2); 2166 PetscAssertPointer(sfBA, 3); 2167 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2168 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2169 2170 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2171 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2172 2173 /* TODO: Check roots of sfB have degree of 1 */ 2174 /* Once we implement it, we can replace the MPI_MAXLOC 2175 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2176 We use MPI_MAXLOC only to have a deterministic output from this routine if 2177 the root condition is not meet. 2178 */ 2179 op = MPI_MAXLOC; 2180 #if defined(PETSC_USE_64BIT_INDICES) 2181 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2182 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2183 if (iswin) op = MPI_REPLACE; 2184 #endif 2185 2186 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2187 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2188 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2189 reorderedRemotePointsA[i].rank = -1; 2190 reorderedRemotePointsA[i].index = -1; 2191 } 2192 if (localPointsA) { 2193 for (i = 0; i < numLeavesA; i++) { 2194 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2195 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2196 } 2197 } else { 2198 for (i = 0; i < numLeavesA; i++) { 2199 if (i > maxleaf || i < minleaf) continue; 2200 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2201 } 2202 } 2203 2204 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2205 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2206 for (i = 0; i < numRootsB; i++) { 2207 remotePointsBA[i].rank = -1; 2208 remotePointsBA[i].index = -1; 2209 } 2210 2211 PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2212 PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2213 PetscCall(PetscFree(reorderedRemotePointsA)); 2214 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2215 if (remotePointsBA[i].rank == -1) continue; 2216 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2217 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2218 localPointsBA[numLeavesBA] = i; 2219 numLeavesBA++; 2220 } 2221 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2222 PetscCall(PetscSFSetFromOptions(*sfBA)); 2223 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2224 PetscFunctionReturn(PETSC_SUCCESS); 2225 } 2226 2227 /* 2228 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2229 2230 Input Parameter: 2231 . sf - The global `PetscSF` 2232 2233 Output Parameter: 2234 . out - The local `PetscSF` 2235 2236 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()` 2237 */ 2238 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2239 { 2240 MPI_Comm comm; 2241 PetscMPIInt myrank; 2242 const PetscInt *ilocal; 2243 const PetscSFNode *iremote; 2244 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2245 PetscSFNode *liremote; 2246 PetscSF lsf; 2247 2248 PetscFunctionBegin; 2249 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2250 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2251 else { 2252 PetscMPIInt irank; 2253 2254 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2255 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2256 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2257 2258 /* Find out local edges and build a local SF */ 2259 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2260 for (i = lnleaves = 0; i < nleaves; i++) { 2261 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2262 if (irank == myrank) lnleaves++; 2263 } 2264 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2265 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2266 2267 for (i = j = 0; i < nleaves; i++) { 2268 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2269 if (irank == myrank) { 2270 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2271 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2272 liremote[j].index = iremote[i].index; 2273 j++; 2274 } 2275 } 2276 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2277 PetscCall(PetscSFSetFromOptions(lsf)); 2278 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2279 PetscCall(PetscSFSetUp(lsf)); 2280 *out = lsf; 2281 } 2282 PetscFunctionReturn(PETSC_SUCCESS); 2283 } 2284 2285 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2286 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2287 { 2288 PetscMemType rootmtype, leafmtype; 2289 2290 PetscFunctionBegin; 2291 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2292 PetscCall(PetscSFSetUp(sf)); 2293 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2294 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2295 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2296 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2297 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2298 PetscFunctionReturn(PETSC_SUCCESS); 2299 } 2300 2301 /*@ 2302 PetscSFConcatenate - concatenate multiple `PetscSF` into a new `PetscSF` 2303 2304 Input Parameters: 2305 + comm - the communicator 2306 . nsfs - the number of input `PetscSF` 2307 . sfs - the array of input `PetscSF` 2308 . rootMode - the root mode specifying how roots are handled 2309 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2310 2311 Output Parameter: 2312 . newsf - The resulting `PetscSF` 2313 2314 Level: advanced 2315 2316 Notes: 2317 The communicator of all `PetscSF`s in `sfs` must be comm. 2318 2319 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2320 2321 The offsets in `leafOffsets` are added to the original leaf indices. 2322 2323 If all input `PetscSF`s use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2324 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2325 2326 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2327 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2328 2329 All root modes retain the essential connectivity condition. 2330 2331 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2332 2333 Parameter `rootMode` controls how the input root spaces are combined. 2334 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2335 and is also the same in the output `PetscSF`. 2336 2337 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2338 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2339 roots of sfs[0], sfs[1], sfs[2], ... are joined on each MPI process separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2340 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2341 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2342 the original root MPI processes are ignored. 2343 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2344 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each MPI process 2345 to keep the load balancing. 2346 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different MPI processes. 2347 2348 Example: 2349 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2350 .vb 2351 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2352 for m in {local,global,shared}; do 2353 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2354 done 2355 .ve 2356 we generate two identical `PetscSF`s sf_0 and sf_1, 2357 .vb 2358 PetscSF Object: sf_0 2 MPI processes 2359 type: basic 2360 rank #leaves #roots 2361 [ 0] 4 2 2362 [ 1] 4 2 2363 leaves roots roots in global numbering 2364 ( 0, 0) <- ( 0, 0) = 0 2365 ( 0, 1) <- ( 0, 1) = 1 2366 ( 0, 2) <- ( 1, 0) = 2 2367 ( 0, 3) <- ( 1, 1) = 3 2368 ( 1, 0) <- ( 0, 0) = 0 2369 ( 1, 1) <- ( 0, 1) = 1 2370 ( 1, 2) <- ( 1, 0) = 2 2371 ( 1, 3) <- ( 1, 1) = 3 2372 .ve 2373 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2374 .vb 2375 rootMode = local: 2376 PetscSF Object: result_sf 2 MPI processes 2377 type: basic 2378 rank #leaves #roots 2379 [ 0] 8 4 2380 [ 1] 8 4 2381 leaves roots roots in global numbering 2382 ( 0, 0) <- ( 0, 0) = 0 2383 ( 0, 1) <- ( 0, 1) = 1 2384 ( 0, 2) <- ( 1, 0) = 4 2385 ( 0, 3) <- ( 1, 1) = 5 2386 ( 0, 4) <- ( 0, 2) = 2 2387 ( 0, 5) <- ( 0, 3) = 3 2388 ( 0, 6) <- ( 1, 2) = 6 2389 ( 0, 7) <- ( 1, 3) = 7 2390 ( 1, 0) <- ( 0, 0) = 0 2391 ( 1, 1) <- ( 0, 1) = 1 2392 ( 1, 2) <- ( 1, 0) = 4 2393 ( 1, 3) <- ( 1, 1) = 5 2394 ( 1, 4) <- ( 0, 2) = 2 2395 ( 1, 5) <- ( 0, 3) = 3 2396 ( 1, 6) <- ( 1, 2) = 6 2397 ( 1, 7) <- ( 1, 3) = 7 2398 2399 rootMode = global: 2400 PetscSF Object: result_sf 2 MPI processes 2401 type: basic 2402 rank #leaves #roots 2403 [ 0] 8 4 2404 [ 1] 8 4 2405 leaves roots roots in global numbering 2406 ( 0, 0) <- ( 0, 0) = 0 2407 ( 0, 1) <- ( 0, 1) = 1 2408 ( 0, 2) <- ( 0, 2) = 2 2409 ( 0, 3) <- ( 0, 3) = 3 2410 ( 0, 4) <- ( 1, 0) = 4 2411 ( 0, 5) <- ( 1, 1) = 5 2412 ( 0, 6) <- ( 1, 2) = 6 2413 ( 0, 7) <- ( 1, 3) = 7 2414 ( 1, 0) <- ( 0, 0) = 0 2415 ( 1, 1) <- ( 0, 1) = 1 2416 ( 1, 2) <- ( 0, 2) = 2 2417 ( 1, 3) <- ( 0, 3) = 3 2418 ( 1, 4) <- ( 1, 0) = 4 2419 ( 1, 5) <- ( 1, 1) = 5 2420 ( 1, 6) <- ( 1, 2) = 6 2421 ( 1, 7) <- ( 1, 3) = 7 2422 2423 rootMode = shared: 2424 PetscSF Object: result_sf 2 MPI processes 2425 type: basic 2426 rank #leaves #roots 2427 [ 0] 8 2 2428 [ 1] 8 2 2429 leaves roots roots in global numbering 2430 ( 0, 0) <- ( 0, 0) = 0 2431 ( 0, 1) <- ( 0, 1) = 1 2432 ( 0, 2) <- ( 1, 0) = 2 2433 ( 0, 3) <- ( 1, 1) = 3 2434 ( 0, 4) <- ( 0, 0) = 0 2435 ( 0, 5) <- ( 0, 1) = 1 2436 ( 0, 6) <- ( 1, 0) = 2 2437 ( 0, 7) <- ( 1, 1) = 3 2438 ( 1, 0) <- ( 0, 0) = 0 2439 ( 1, 1) <- ( 0, 1) = 1 2440 ( 1, 2) <- ( 1, 0) = 2 2441 ( 1, 3) <- ( 1, 1) = 3 2442 ( 1, 4) <- ( 0, 0) = 0 2443 ( 1, 5) <- ( 0, 1) = 1 2444 ( 1, 6) <- ( 1, 0) = 2 2445 ( 1, 7) <- ( 1, 1) = 3 2446 .ve 2447 2448 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2449 @*/ 2450 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2451 { 2452 PetscInt i, s, nLeaves, nRoots; 2453 PetscInt *leafArrayOffsets; 2454 PetscInt *ilocal_new; 2455 PetscSFNode *iremote_new; 2456 PetscBool all_ilocal_null = PETSC_FALSE; 2457 PetscLayout glayout = NULL; 2458 PetscInt *gremote = NULL; 2459 PetscMPIInt rank, size; 2460 2461 PetscFunctionBegin; 2462 if (PetscDefined(USE_DEBUG)) { 2463 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2464 2465 PetscCall(PetscSFCreate(comm, &dummy)); 2466 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2467 PetscAssertPointer(sfs, 3); 2468 for (i = 0; i < nsfs; i++) { 2469 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2470 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2471 } 2472 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2473 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2474 PetscAssertPointer(newsf, 6); 2475 PetscCall(PetscSFDestroy(&dummy)); 2476 } 2477 if (!nsfs) { 2478 PetscCall(PetscSFCreate(comm, newsf)); 2479 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2480 PetscFunctionReturn(PETSC_SUCCESS); 2481 } 2482 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2483 PetscCallMPI(MPI_Comm_size(comm, &size)); 2484 2485 /* Calculate leaf array offsets */ 2486 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2487 leafArrayOffsets[0] = 0; 2488 for (s = 0; s < nsfs; s++) { 2489 PetscInt nl; 2490 2491 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2492 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2493 } 2494 nLeaves = leafArrayOffsets[nsfs]; 2495 2496 /* Calculate number of roots */ 2497 switch (rootMode) { 2498 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2499 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2500 if (PetscDefined(USE_DEBUG)) { 2501 for (s = 1; s < nsfs; s++) { 2502 PetscInt nr; 2503 2504 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2505 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2506 } 2507 } 2508 } break; 2509 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2510 /* Calculate also global layout in this case */ 2511 PetscInt *nls; 2512 PetscLayout *lts; 2513 PetscInt **inds; 2514 PetscInt j; 2515 PetscInt rootOffset = 0; 2516 2517 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2518 PetscCall(PetscLayoutCreate(comm, &glayout)); 2519 glayout->bs = 1; 2520 glayout->n = 0; 2521 glayout->N = 0; 2522 for (s = 0; s < nsfs; s++) { 2523 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2524 glayout->n += lts[s]->n; 2525 glayout->N += lts[s]->N; 2526 } 2527 PetscCall(PetscLayoutSetUp(glayout)); 2528 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2529 for (s = 0, j = 0; s < nsfs; s++) { 2530 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2531 rootOffset += lts[s]->N; 2532 PetscCall(PetscLayoutDestroy(<s[s])); 2533 PetscCall(PetscFree(inds[s])); 2534 } 2535 PetscCall(PetscFree3(lts, nls, inds)); 2536 nRoots = glayout->N; 2537 } break; 2538 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2539 /* nRoots calculated later in this case */ 2540 break; 2541 default: 2542 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2543 } 2544 2545 if (!leafOffsets) { 2546 all_ilocal_null = PETSC_TRUE; 2547 for (s = 0; s < nsfs; s++) { 2548 const PetscInt *ilocal; 2549 2550 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2551 if (ilocal) { 2552 all_ilocal_null = PETSC_FALSE; 2553 break; 2554 } 2555 } 2556 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2557 } 2558 2559 /* Renumber and concatenate local leaves */ 2560 ilocal_new = NULL; 2561 if (!all_ilocal_null) { 2562 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2563 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2564 for (s = 0; s < nsfs; s++) { 2565 const PetscInt *ilocal; 2566 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2567 PetscInt i, nleaves_l; 2568 2569 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2570 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2571 } 2572 } 2573 2574 /* Renumber and concatenate remote roots */ 2575 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2576 PetscInt rootOffset = 0; 2577 2578 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2579 for (i = 0; i < nLeaves; i++) { 2580 iremote_new[i].rank = -1; 2581 iremote_new[i].index = -1; 2582 } 2583 for (s = 0; s < nsfs; s++) { 2584 PetscInt i, nl, nr; 2585 PetscSF tmp_sf; 2586 const PetscSFNode *iremote; 2587 PetscSFNode *tmp_rootdata; 2588 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2589 2590 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2591 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2592 /* create helper SF with contiguous leaves */ 2593 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2594 PetscCall(PetscSFSetUp(tmp_sf)); 2595 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2596 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2597 for (i = 0; i < nr; i++) { 2598 tmp_rootdata[i].index = i + rootOffset; 2599 tmp_rootdata[i].rank = rank; 2600 } 2601 rootOffset += nr; 2602 } else { 2603 for (i = 0; i < nr; i++) { 2604 tmp_rootdata[i].index = i; 2605 tmp_rootdata[i].rank = rank; 2606 } 2607 } 2608 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2609 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2610 PetscCall(PetscSFDestroy(&tmp_sf)); 2611 PetscCall(PetscFree(tmp_rootdata)); 2612 } 2613 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2614 2615 /* Build the new SF */ 2616 PetscCall(PetscSFCreate(comm, newsf)); 2617 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2618 } else { 2619 /* Build the new SF */ 2620 PetscCall(PetscSFCreate(comm, newsf)); 2621 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2622 } 2623 PetscCall(PetscSFSetUp(*newsf)); 2624 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2625 PetscCall(PetscLayoutDestroy(&glayout)); 2626 PetscCall(PetscFree(gremote)); 2627 PetscCall(PetscFree(leafArrayOffsets)); 2628 PetscFunctionReturn(PETSC_SUCCESS); 2629 } 2630 2631 /*@ 2632 PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated `PetscSF` communications. 2633 2634 Collective 2635 2636 Input Parameters: 2637 + sf - star forest 2638 . unit - the data type contained within `rootdata` and `leafdata` 2639 . rootdata - root data that will be used for multiple `PetscSF` communications 2640 - leafdata - leaf data that will be used for multiple `PetscSF` communications 2641 2642 Level: advanced 2643 2644 Notes: 2645 Implementations of `PetscSF` can make optimizations 2646 for repeated communication using the same memory regions, but these optimizations 2647 can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed. 2648 The intended pattern is 2649 2650 .vb 2651 PetscMalloc2(nroots, &rootdata, nleaves, &leafdata); 2652 2653 PetscSFRegisterPersistent(sf, unit, rootdata, leafdata); 2654 // repeated use of rootdata and leafdata will now be optimized 2655 2656 PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE); 2657 PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE); 2658 // ... 2659 PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM); 2660 PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM); 2661 // ... (other communications) 2662 2663 // rootdata and leafdata must be deregistered before freeing 2664 // skipping this can lead to undefined behavior including 2665 // deadlocks 2666 PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata); 2667 2668 // it is now safe to free rootdata and leafdata 2669 PetscFree2(rootdata, leafdata); 2670 .ve 2671 2672 If you do not register `rootdata` and `leafdata` it will not cause an error, 2673 but optimizations that reduce the setup time for each communication cannot be 2674 made. Currently, the only implementation of `PetscSF` that benefits from 2675 `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default 2676 `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`. 2677 2678 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()` 2679 @*/ 2680 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2681 { 2682 PetscFunctionBegin; 2683 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2684 PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2685 PetscFunctionReturn(PETSC_SUCCESS); 2686 } 2687 2688 /*@ 2689 PetscSFDeregisterPersistent - Signal that repeated usage of `rootdata` and `leafdata` for `PetscSF` communication has concluded. 2690 2691 Collective 2692 2693 Input Parameters: 2694 + sf - star forest 2695 . unit - the data type contained within the `rootdata` and `leafdata` 2696 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()` 2697 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()` 2698 2699 Level: advanced 2700 2701 Note: 2702 See `PetscSFRegisterPersistent()` for when and how to use this function. 2703 2704 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()` 2705 @*/ 2706 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2707 { 2708 PetscFunctionBegin; 2709 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2710 PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size) 2715 { 2716 MPI_Aint lb, lb_true, bytes, bytes_true; 2717 2718 PetscFunctionBegin; 2719 PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 2720 PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 2721 PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 2722 *size = bytes; 2723 PetscFunctionReturn(PETSC_SUCCESS); 2724 } 2725