1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <petscdevice_cuda.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 extern void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Key: 41 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window - Use MPI-3 one-sided window for communication 43 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 44 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 45 46 Level: intermediate 47 48 Note: 49 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 50 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 51 `PetscSF`s are optimized and have better performance than the general `PetscSF`s. 52 53 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 54 @*/ 55 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 56 { 57 PetscSF b; 58 59 PetscFunctionBegin; 60 PetscAssertPointer(sf, 2); 61 PetscCall(PetscSFInitializePackage()); 62 63 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 64 b->nroots = -1; 65 b->nleaves = -1; 66 b->minleaf = PETSC_INT_MAX; 67 b->maxleaf = PETSC_INT_MIN; 68 b->nranks = -1; 69 b->rankorder = PETSC_TRUE; 70 b->ingroup = MPI_GROUP_NULL; 71 b->outgroup = MPI_GROUP_NULL; 72 b->graphset = PETSC_FALSE; 73 #if defined(PETSC_HAVE_DEVICE) 74 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 75 b->use_stream_aware_mpi = PETSC_FALSE; 76 b->unknown_input_stream = PETSC_FALSE; 77 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 78 b->backend = PETSCSF_BACKEND_KOKKOS; 79 #elif defined(PETSC_HAVE_CUDA) 80 b->backend = PETSCSF_BACKEND_CUDA; 81 #elif defined(PETSC_HAVE_HIP) 82 b->backend = PETSCSF_BACKEND_HIP; 83 #endif 84 85 #if defined(PETSC_HAVE_NVSHMEM) 86 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 87 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 88 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 89 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 90 #endif 91 #endif 92 b->vscat.from_n = -1; 93 b->vscat.to_n = -1; 94 b->vscat.unit = MPIU_SCALAR; 95 *sf = b; 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 /*@ 100 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 101 102 Collective 103 104 Input Parameter: 105 . sf - star forest 106 107 Level: advanced 108 109 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 110 @*/ 111 PetscErrorCode PetscSFReset(PetscSF sf) 112 { 113 PetscFunctionBegin; 114 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 115 PetscTryTypeMethod(sf, Reset); 116 PetscCall(PetscSFDestroy(&sf->rankssf)); 117 118 sf->nroots = -1; 119 sf->nleaves = -1; 120 sf->minleaf = PETSC_INT_MAX; 121 sf->maxleaf = PETSC_INT_MIN; 122 sf->mine = NULL; 123 sf->remote = NULL; 124 sf->graphset = PETSC_FALSE; 125 PetscCall(PetscFree(sf->mine_alloc)); 126 PetscCall(PetscFree(sf->remote_alloc)); 127 sf->nranks = -1; 128 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 129 sf->degreeknown = PETSC_FALSE; 130 PetscCall(PetscFree(sf->degree)); 131 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 132 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 133 134 if (sf->multi) sf->multi->multi = NULL; 135 PetscCall(PetscSFDestroy(&sf->multi)); 136 137 PetscCall(PetscLayoutDestroy(&sf->map)); 138 139 #if defined(PETSC_HAVE_DEVICE) 140 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 141 #endif 142 143 sf->setupcalled = PETSC_FALSE; 144 PetscFunctionReturn(PETSC_SUCCESS); 145 } 146 147 /*@ 148 PetscSFSetType - Set the `PetscSF` communication implementation 149 150 Collective 151 152 Input Parameters: 153 + sf - the `PetscSF` context 154 - type - a known method 155 .vb 156 PETSCSFWINDOW - MPI-2/3 one-sided 157 PETSCSFBASIC - basic implementation using MPI-1 two-sided 158 .ve 159 160 Options Database Key: 161 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 162 163 Level: intermediate 164 165 Notes: 166 See `PetscSFType` for possible values 167 168 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()` 169 @*/ 170 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 171 { 172 PetscBool match; 173 PetscErrorCode (*r)(PetscSF); 174 175 PetscFunctionBegin; 176 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 177 PetscAssertPointer(type, 2); 178 179 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 180 if (match) PetscFunctionReturn(PETSC_SUCCESS); 181 182 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 183 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 184 /* Destroy the previous PetscSF implementation context */ 185 PetscTryTypeMethod(sf, Destroy); 186 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 187 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 188 PetscCall((*r)(sf)); 189 PetscFunctionReturn(PETSC_SUCCESS); 190 } 191 192 /*@ 193 PetscSFGetType - Get the `PetscSF` communication implementation 194 195 Not Collective 196 197 Input Parameter: 198 . sf - the `PetscSF` context 199 200 Output Parameter: 201 . type - the `PetscSF` type name 202 203 Level: intermediate 204 205 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 206 @*/ 207 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 208 { 209 PetscFunctionBegin; 210 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 211 PetscAssertPointer(type, 2); 212 *type = ((PetscObject)sf)->type_name; 213 PetscFunctionReturn(PETSC_SUCCESS); 214 } 215 216 /*@ 217 PetscSFDestroy - destroy a star forest 218 219 Collective 220 221 Input Parameter: 222 . sf - address of star forest 223 224 Level: intermediate 225 226 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 227 @*/ 228 PetscErrorCode PetscSFDestroy(PetscSF *sf) 229 { 230 PetscFunctionBegin; 231 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 232 PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 233 if (--((PetscObject)*sf)->refct > 0) { 234 *sf = NULL; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 PetscCall(PetscSFReset(*sf)); 238 PetscTryTypeMethod(*sf, Destroy); 239 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 240 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 241 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 242 if ((*sf)->use_stream_aware_mpi) { 243 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 244 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 245 } 246 #endif 247 PetscCall(PetscHeaderDestroy(sf)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 252 { 253 PetscInt i, nleaves; 254 PetscMPIInt size; 255 const PetscInt *ilocal; 256 const PetscSFNode *iremote; 257 258 PetscFunctionBegin; 259 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 260 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 261 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 262 for (i = 0; i < nleaves; i++) { 263 const PetscInt rank = iremote[i].rank; 264 const PetscInt remote = iremote[i].index; 265 const PetscInt leaf = ilocal ? ilocal[i] : i; 266 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 267 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 268 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 269 } 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 /*@ 274 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 275 276 Collective 277 278 Input Parameter: 279 . sf - star forest communication object 280 281 Level: beginner 282 283 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 284 @*/ 285 PetscErrorCode PetscSFSetUp(PetscSF sf) 286 { 287 PetscFunctionBegin; 288 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 289 PetscSFCheckGraphSet(sf, 1); 290 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 291 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 292 PetscCall(PetscSFCheckGraphValid_Private(sf)); 293 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 294 PetscTryTypeMethod(sf, SetUp); 295 #if defined(PETSC_HAVE_CUDA) 296 if (sf->backend == PETSCSF_BACKEND_CUDA) { 297 sf->ops->Malloc = PetscSFMalloc_CUDA; 298 sf->ops->Free = PetscSFFree_CUDA; 299 } 300 #endif 301 #if defined(PETSC_HAVE_HIP) 302 if (sf->backend == PETSCSF_BACKEND_HIP) { 303 sf->ops->Malloc = PetscSFMalloc_HIP; 304 sf->ops->Free = PetscSFFree_HIP; 305 } 306 #endif 307 308 #if defined(PETSC_HAVE_KOKKOS) 309 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 310 sf->ops->Malloc = PetscSFMalloc_Kokkos; 311 sf->ops->Free = PetscSFFree_Kokkos; 312 } 313 #endif 314 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 315 sf->setupcalled = PETSC_TRUE; 316 PetscFunctionReturn(PETSC_SUCCESS); 317 } 318 319 /*@ 320 PetscSFSetFromOptions - set `PetscSF` options using the options database 321 322 Logically Collective 323 324 Input Parameter: 325 . sf - star forest 326 327 Options Database Keys: 328 + -sf_type - implementation type, see `PetscSFSetType()` 329 . -sf_rank_order - sort composite points for gathers and scatters in MPI rank order, gathers are non-deterministic otherwise 330 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 331 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 332 If true, this option only works with `-use_gpu_aware_mpi 1`. 333 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 334 If true, this option only works with `-use_gpu_aware_mpi 1`. 335 336 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 337 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 338 the only available is kokkos. 339 340 Level: intermediate 341 342 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 343 @*/ 344 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 345 { 346 PetscSFType deft; 347 char type[256]; 348 PetscBool flg; 349 350 PetscFunctionBegin; 351 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 352 PetscObjectOptionsBegin((PetscObject)sf); 353 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 354 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 355 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 356 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 357 PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL)); 358 #if defined(PETSC_HAVE_DEVICE) 359 { 360 char backendstr[32] = {0}; 361 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 362 /* Change the defaults set in PetscSFCreate() with command line options */ 363 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 364 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 365 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 366 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 367 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 368 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 369 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 370 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 371 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 372 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 373 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 374 #elif defined(PETSC_HAVE_KOKKOS) 375 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 376 #endif 377 378 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 379 if (sf->use_stream_aware_mpi) { 380 MPI_Info info; 381 382 PetscCallMPI(MPI_Info_create(&info)); 383 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 384 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 385 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 386 PetscCallMPI(MPI_Info_free(&info)); 387 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 388 } 389 #endif 390 } 391 #endif 392 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 393 PetscOptionsEnd(); 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 397 /*@ 398 PetscSFSetRankOrder - sort multi-points for gathers and scatters by MPI rank order 399 400 Logically Collective 401 402 Input Parameters: 403 + sf - star forest 404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (false has a lower setup cost, but is non-deterministic) 405 406 Level: advanced 407 408 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 409 @*/ 410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 411 { 412 PetscFunctionBegin; 413 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 414 PetscValidLogicalCollectiveBool(sf, flg, 2); 415 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 416 sf->rankorder = flg; 417 PetscFunctionReturn(PETSC_SUCCESS); 418 } 419 420 /*@ 421 PetscSFSetGraph - Set a parallel star forest 422 423 Collective 424 425 Input Parameters: 426 + sf - star forest 427 . nroots - number of root vertices on the current MPI process (these are possible targets for other process to attach leaves) 428 . nleaves - number of leaf vertices on the current MPI process, each of these references a root on any process 429 . ilocal - locations of leaves in leafdata buffers (locations must be >= 0, enforced during setup in debug mode), pass `NULL` for contiguous storage (same as passing (0, 1, 2, ..., nleaves-1)) 430 . localmode - copy mode for `ilocal` 431 . iremote - remote locations of root vertices for each leaf on the current process, length is `nleaves' (locations must be >= 0, enforced during setup in debug mode) 432 - remotemode - copy mode for `iremote` 433 434 Level: intermediate 435 436 Notes: 437 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 438 439 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 440 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 441 PETSc might modify the respective array; 442 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 443 If `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 444 445 `PetscSFSetGraphWithPattern()` is an alternative approach to provide certain communication patterns that have extra optimizations. 446 447 Fortran Notes: 448 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 449 450 Developer Notes: 451 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 452 This also allows to compare leaf sets of two `PetscSF`s easily. 453 454 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`, `PetscSFSetGraphWithPattern()` 455 @*/ 456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode) 457 { 458 PetscBool unique, contiguous; 459 460 PetscFunctionBegin; 461 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 462 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 463 if (nleaves > 0) PetscAssertPointer(iremote, 6); 464 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 465 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 466 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 467 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 468 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 469 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 470 471 if (sf->nroots >= 0) { /* Reset only if graph already set */ 472 PetscCall(PetscSFReset(sf)); 473 } 474 475 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 476 if (PetscDefined(USE_DEBUG)) { 477 PetscMPIInt size; 478 479 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 480 for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); 481 } 482 483 sf->nroots = nroots; 484 sf->nleaves = nleaves; 485 486 if (localmode == PETSC_COPY_VALUES && ilocal) { 487 PetscInt *tlocal = NULL; 488 489 PetscCall(PetscMalloc1(nleaves, &tlocal)); 490 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 491 ilocal = tlocal; 492 } 493 if (remotemode == PETSC_COPY_VALUES) { 494 PetscSFNode *tremote = NULL; 495 496 PetscCall(PetscMalloc1(nleaves, &tremote)); 497 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 498 iremote = tremote; 499 } 500 501 if (nleaves && ilocal) { 502 PetscSFNode work; 503 504 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 505 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 506 unique = PetscNot(unique); 507 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 508 sf->minleaf = ilocal[0]; 509 sf->maxleaf = ilocal[nleaves - 1]; 510 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 511 } else { 512 sf->minleaf = 0; 513 sf->maxleaf = nleaves - 1; 514 unique = PETSC_TRUE; 515 contiguous = PETSC_TRUE; 516 } 517 518 if (contiguous) { 519 if (localmode == PETSC_USE_POINTER) { 520 ilocal = NULL; 521 } else { 522 PetscCall(PetscFree(ilocal)); 523 } 524 } 525 sf->mine = ilocal; 526 if (localmode == PETSC_USE_POINTER) { 527 sf->mine_alloc = NULL; 528 } else { 529 sf->mine_alloc = ilocal; 530 } 531 if (PetscDefined(USE_DEBUG)) { 532 PetscMPIInt size; 533 534 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 535 for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); 536 } 537 sf->remote = iremote; 538 if (remotemode == PETSC_USE_POINTER) { 539 sf->remote_alloc = NULL; 540 } else { 541 sf->remote_alloc = iremote; 542 } 543 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 544 sf->graphset = PETSC_TRUE; 545 PetscFunctionReturn(PETSC_SUCCESS); 546 } 547 548 /*@ 549 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 550 551 Collective 552 553 Input Parameters: 554 + sf - The `PetscSF` 555 . map - Layout of roots over all processes (not used when pattern is `PETSCSF_PATTERN_ALLTOALL`) 556 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 557 558 Level: intermediate 559 560 Notes: 561 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `root` and its `PetscLayout` is `map`. 562 `n` and `N` are the local and global sizes of `root` respectively. 563 564 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to 565 sequential vectors `leaves` on all MPI processes. 566 567 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to a 568 sequential vector `leaves` on MPI rank 0. 569 570 With `PETSCSF_PATTERN_ALLTOALL`, map is not used. Suppose NP is the size of `sf`'s communicator. The routine 571 creates a graph where every MPI process has NP leaves and NP roots. On MPI rank i, its leaf j is connected to root i 572 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 573 not mean one can not send multiple items. One needs to create a new MPI datatype for the multiple data 574 items with `MPI_Type_contiguous` and use that as the <unit> argument in the `PetscSF` routines. In this case, roots and leaves are symmetric. 575 576 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 577 @*/ 578 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 579 { 580 MPI_Comm comm; 581 PetscInt n, N, res[2]; 582 PetscMPIInt rank, size; 583 PetscSFType type; 584 585 PetscFunctionBegin; 586 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 587 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 588 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 589 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 590 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 591 PetscCallMPI(MPI_Comm_size(comm, &size)); 592 593 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 594 PetscInt sizei = size; 595 596 type = PETSCSFALLTOALL; 597 PetscCall(PetscLayoutCreate(comm, &sf->map)); 598 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 599 PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei))); 600 PetscCall(PetscLayoutSetUp(sf->map)); 601 } else { 602 PetscCall(PetscLayoutGetLocalSize(map, &n)); 603 PetscCall(PetscLayoutGetSize(map, &N)); 604 res[0] = n; 605 res[1] = -n; 606 /* Check if n are same over all ranks so that we can optimize it */ 607 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 608 if (res[0] == -res[1]) { /* same n */ 609 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 610 } else { 611 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 612 } 613 PetscCall(PetscLayoutReference(map, &sf->map)); 614 } 615 PetscCall(PetscSFSetType(sf, type)); 616 617 sf->pattern = pattern; 618 sf->mine = NULL; /* Contiguous */ 619 620 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 621 Also set other easy stuff. 622 */ 623 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 624 sf->nleaves = N; 625 sf->nroots = n; 626 sf->nranks = size; 627 sf->minleaf = 0; 628 sf->maxleaf = N - 1; 629 } else if (pattern == PETSCSF_PATTERN_GATHER) { 630 sf->nleaves = rank ? 0 : N; 631 sf->nroots = n; 632 sf->nranks = rank ? 0 : size; 633 sf->minleaf = 0; 634 sf->maxleaf = rank ? -1 : N - 1; 635 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 636 sf->nleaves = size; 637 sf->nroots = size; 638 sf->nranks = size; 639 sf->minleaf = 0; 640 sf->maxleaf = size - 1; 641 } 642 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 643 sf->graphset = PETSC_TRUE; 644 PetscFunctionReturn(PETSC_SUCCESS); 645 } 646 647 /*@ 648 PetscSFCreateInverseSF - given a `PetscSF` in which all roots have degree 1 (exactly one leaf), creates the inverse map 649 650 Collective 651 652 Input Parameter: 653 . sf - star forest to invert 654 655 Output Parameter: 656 . isf - inverse of `sf` 657 658 Level: advanced 659 660 Notes: 661 All roots must have degree 1. 662 663 The local space may be a permutation, but cannot be sparse. 664 665 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 666 @*/ 667 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 668 { 669 PetscMPIInt rank; 670 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 671 const PetscInt *ilocal; 672 PetscSFNode *roots, *leaves; 673 674 PetscFunctionBegin; 675 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 676 PetscSFCheckGraphSet(sf, 1); 677 PetscAssertPointer(isf, 2); 678 679 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 680 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 681 682 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 683 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 684 for (i = 0; i < maxlocal; i++) { 685 leaves[i].rank = rank; 686 leaves[i].index = i; 687 } 688 for (i = 0; i < nroots; i++) { 689 roots[i].rank = -1; 690 roots[i].index = -1; 691 } 692 PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 693 PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 694 695 /* Check whether our leaves are sparse */ 696 for (i = 0, count = 0; i < nroots; i++) 697 if (roots[i].rank >= 0) count++; 698 if (count == nroots) newilocal = NULL; 699 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 700 for (i = 0, count = 0; i < nroots; i++) { 701 if (roots[i].rank >= 0) { 702 newilocal[count] = i; 703 roots[count].rank = roots[i].rank; 704 roots[count].index = roots[i].index; 705 count++; 706 } 707 } 708 } 709 710 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 711 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 712 PetscCall(PetscFree2(roots, leaves)); 713 PetscFunctionReturn(PETSC_SUCCESS); 714 } 715 716 /*@ 717 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 718 719 Collective 720 721 Input Parameters: 722 + sf - communication object to duplicate 723 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 724 725 Output Parameter: 726 . newsf - new communication object 727 728 Level: beginner 729 730 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 731 @*/ 732 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 733 { 734 PetscSFType type; 735 MPI_Datatype dtype = MPIU_SCALAR; 736 737 PetscFunctionBegin; 738 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 739 PetscValidLogicalCollectiveEnum(sf, opt, 2); 740 PetscAssertPointer(newsf, 3); 741 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 742 PetscCall(PetscSFGetType(sf, &type)); 743 if (type) PetscCall(PetscSFSetType(*newsf, type)); 744 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 745 if (opt == PETSCSF_DUPLICATE_GRAPH) { 746 PetscSFCheckGraphSet(sf, 1); 747 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 748 PetscInt nroots, nleaves; 749 const PetscInt *ilocal; 750 const PetscSFNode *iremote; 751 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 752 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 753 } else { 754 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 755 } 756 } 757 /* Since oldtype is committed, so is newtype, according to MPI */ 758 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 759 (*newsf)->vscat.bs = sf->vscat.bs; 760 (*newsf)->vscat.unit = dtype; 761 (*newsf)->vscat.to_n = sf->vscat.to_n; 762 (*newsf)->vscat.from_n = sf->vscat.from_n; 763 /* Do not copy lsf. Build it on demand since it is rarely used */ 764 765 #if defined(PETSC_HAVE_DEVICE) 766 (*newsf)->backend = sf->backend; 767 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 768 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 769 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 770 #endif 771 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 772 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 773 PetscFunctionReturn(PETSC_SUCCESS); 774 } 775 776 /*@C 777 PetscSFGetGraph - Get the graph specifying a parallel star forest 778 779 Not Collective 780 781 Input Parameter: 782 . sf - star forest 783 784 Output Parameters: 785 + nroots - number of root vertices on the current process (these are possible targets for other MPI process to attach leaves) 786 . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process 787 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 788 - iremote - remote locations of root vertices for each leaf on the current process 789 790 Level: intermediate 791 792 Notes: 793 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 794 795 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 796 797 Fortran Note: 798 Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed 799 800 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 801 @*/ 802 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[]) 803 { 804 PetscFunctionBegin; 805 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 806 if (sf->ops->GetGraph) { 807 PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 808 } else { 809 if (nroots) *nroots = sf->nroots; 810 if (nleaves) *nleaves = sf->nleaves; 811 if (ilocal) *ilocal = sf->mine; 812 if (iremote) *iremote = sf->remote; 813 } 814 PetscFunctionReturn(PETSC_SUCCESS); 815 } 816 817 /*@ 818 PetscSFGetLeafRange - Get the active leaf ranges 819 820 Not Collective 821 822 Input Parameter: 823 . sf - star forest 824 825 Output Parameters: 826 + minleaf - minimum active leaf on this MPI process. Returns 0 if there are no leaves. 827 - maxleaf - maximum active leaf on this MPI process. Returns -1 if there are no leaves. 828 829 Level: developer 830 831 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 832 @*/ 833 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 834 { 835 PetscFunctionBegin; 836 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 837 PetscSFCheckGraphSet(sf, 1); 838 if (minleaf) *minleaf = sf->minleaf; 839 if (maxleaf) *maxleaf = sf->maxleaf; 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 /*@ 844 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 845 846 Collective 847 848 Input Parameters: 849 + A - the star forest 850 . obj - Optional object that provides the prefix for the option names 851 - name - command line option 852 853 Level: intermediate 854 855 Note: 856 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 857 858 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 859 @*/ 860 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 861 { 862 PetscFunctionBegin; 863 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 864 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 865 PetscFunctionReturn(PETSC_SUCCESS); 866 } 867 868 /*@ 869 PetscSFView - view a star forest 870 871 Collective 872 873 Input Parameters: 874 + sf - star forest 875 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 876 877 Level: beginner 878 879 .seealso: [](sec_petscsf), `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 880 @*/ 881 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 882 { 883 PetscBool isascii; 884 PetscViewerFormat format; 885 886 PetscFunctionBegin; 887 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 888 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 889 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 890 PetscCheckSameComm(sf, 1, viewer, 2); 891 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 892 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 893 if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 894 PetscMPIInt rank; 895 PetscInt j; 896 897 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 898 PetscCall(PetscViewerASCIIPushTab(viewer)); 899 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 900 if (!sf->graphset) { 901 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 902 PetscCall(PetscViewerASCIIPopTab(viewer)); 903 PetscFunctionReturn(PETSC_SUCCESS); 904 } 905 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 906 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 907 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 908 for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 909 PetscCall(PetscViewerFlush(viewer)); 910 PetscCall(PetscViewerGetFormat(viewer, &format)); 911 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 912 PetscMPIInt *tmpranks, *perm; 913 914 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 915 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 916 for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i; 917 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 918 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 919 for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) { 920 PetscMPIInt i = perm[ii]; 921 922 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 923 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 924 } 925 PetscCall(PetscFree2(tmpranks, perm)); 926 } 927 PetscCall(PetscViewerFlush(viewer)); 928 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 929 } 930 PetscCall(PetscViewerASCIIPopTab(viewer)); 931 } 932 PetscTryTypeMethod(sf, View, viewer); 933 PetscFunctionReturn(PETSC_SUCCESS); 934 } 935 936 /*@C 937 PetscSFGetRootRanks - Get the root MPI ranks and number of vertices referenced by leaves on this process 938 939 Not Collective 940 941 Input Parameter: 942 . sf - star forest 943 944 Output Parameters: 945 + nranks - number of MPI processes referenced by local part 946 . ranks - [`nranks`] array of MPI ranks 947 . roffset - [`nranks`+1] offset in `rmine` and `rremote` for each MPI process 948 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote MPI process, or `NULL` 949 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote MPI process, or `NULL` 950 951 Level: developer 952 953 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetLeafRanks()` 954 @*/ 955 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[]) 956 { 957 PetscFunctionBegin; 958 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 959 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 960 if (sf->ops->GetRootRanks) { 961 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 962 } else { 963 /* The generic implementation */ 964 if (nranks) *nranks = sf->nranks; 965 if (ranks) *ranks = sf->ranks; 966 if (roffset) *roffset = sf->roffset; 967 if (rmine) *rmine = sf->rmine; 968 if (rremote) *rremote = sf->rremote; 969 } 970 PetscFunctionReturn(PETSC_SUCCESS); 971 } 972 973 /*@C 974 PetscSFGetLeafRanks - Get leaf MPI ranks referencing roots on this process 975 976 Not Collective 977 978 Input Parameter: 979 . sf - star forest 980 981 Output Parameters: 982 + niranks - number of leaf MPI processes referencing roots on this process 983 . iranks - [`niranks`] array of MPI ranks 984 . ioffset - [`niranks`+1] offset in `irootloc` for each MPI process 985 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf MPI process 986 987 Level: developer 988 989 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()` 990 @*/ 991 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[]) 992 { 993 PetscFunctionBegin; 994 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 995 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 996 if (sf->ops->GetLeafRanks) { 997 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 998 } else { 999 PetscSFType type; 1000 PetscCall(PetscSFGetType(sf, &type)); 1001 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1002 } 1003 PetscFunctionReturn(PETSC_SUCCESS); 1004 } 1005 1006 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 1007 { 1008 PetscInt i; 1009 for (i = 0; i < n; i++) { 1010 if (needle == list[i]) return PETSC_TRUE; 1011 } 1012 return PETSC_FALSE; 1013 } 1014 1015 /*@C 1016 PetscSFSetUpRanks - Set up data structures associated with MPI ranks; this is for internal use by `PetscSF` implementations. 1017 1018 Collective 1019 1020 Input Parameters: 1021 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1022 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1023 1024 Level: developer 1025 1026 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()` 1027 @*/ 1028 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1029 { 1030 PetscHMapI table; 1031 PetscHashIter pos; 1032 PetscMPIInt size, groupsize, *groupranks, *ranks; 1033 PetscInt *rcount; 1034 PetscInt irank, sfnrank, ranksi; 1035 PetscMPIInt i, orank = -1; 1036 1037 PetscFunctionBegin; 1038 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1039 PetscSFCheckGraphSet(sf, 1); 1040 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1041 PetscCall(PetscHMapICreateWithSize(10, &table)); 1042 for (i = 0; i < sf->nleaves; i++) { 1043 /* Log 1-based rank */ 1044 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1045 } 1046 PetscCall(PetscHMapIGetSize(table, &sfnrank)); 1047 PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks)); 1048 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1049 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1050 PetscHashIterBegin(table, pos); 1051 for (i = 0; i < sf->nranks; i++) { 1052 PetscHashIterGetKey(table, pos, ranksi); 1053 PetscCall(PetscMPIIntCast(ranksi, &ranks[i])); 1054 PetscHashIterGetVal(table, pos, rcount[i]); 1055 PetscHashIterNext(table, pos); 1056 ranks[i]--; /* Convert back to 0-based */ 1057 } 1058 PetscCall(PetscHMapIDestroy(&table)); 1059 1060 /* We expect that dgroup is reliably "small" while nranks could be large */ 1061 { 1062 MPI_Group group = MPI_GROUP_NULL; 1063 PetscMPIInt *dgroupranks; 1064 1065 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1066 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1067 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1068 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1069 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1070 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1071 PetscCallMPI(MPI_Group_free(&group)); 1072 PetscCall(PetscFree(dgroupranks)); 1073 } 1074 1075 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1076 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1077 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1078 if (InList(ranks[i], groupsize, groupranks)) break; 1079 } 1080 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1081 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1082 } 1083 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1084 PetscMPIInt tmprank; 1085 PetscInt tmpcount; 1086 1087 tmprank = ranks[i]; 1088 tmpcount = rcount[i]; 1089 ranks[i] = ranks[sf->ndranks]; 1090 rcount[i] = rcount[sf->ndranks]; 1091 ranks[sf->ndranks] = tmprank; 1092 rcount[sf->ndranks] = tmpcount; 1093 sf->ndranks++; 1094 } 1095 } 1096 PetscCall(PetscFree(groupranks)); 1097 PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount)); 1098 if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1099 sf->roffset[0] = 0; 1100 for (i = 0; i < sf->nranks; i++) { 1101 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1102 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1103 rcount[i] = 0; 1104 } 1105 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1106 /* short circuit */ 1107 if (orank != sf->remote[i].rank) { 1108 /* Search for index of iremote[i].rank in sf->ranks */ 1109 PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank)); 1110 PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank)); 1111 if (irank < 0) { 1112 PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1113 if (irank >= 0) irank += sf->ndranks; 1114 } 1115 } 1116 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank); 1117 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1118 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1119 rcount[irank]++; 1120 } 1121 PetscCall(PetscFree2(rcount, ranks)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 /*@C 1126 PetscSFGetGroups - gets incoming and outgoing process groups 1127 1128 Collective 1129 1130 Input Parameter: 1131 . sf - star forest 1132 1133 Output Parameters: 1134 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1135 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1136 1137 Level: developer 1138 1139 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1140 @*/ 1141 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1142 { 1143 MPI_Group group = MPI_GROUP_NULL; 1144 1145 PetscFunctionBegin; 1146 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1147 if (sf->ingroup == MPI_GROUP_NULL) { 1148 PetscInt i; 1149 const PetscInt *indegree; 1150 PetscMPIInt rank, *outranks, *inranks, indegree0; 1151 PetscSFNode *remote; 1152 PetscSF bgcount; 1153 1154 /* Compute the number of incoming ranks */ 1155 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1156 for (i = 0; i < sf->nranks; i++) { 1157 remote[i].rank = sf->ranks[i]; 1158 remote[i].index = 0; 1159 } 1160 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1161 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1162 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1163 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1164 /* Enumerate the incoming ranks */ 1165 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1166 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1167 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1168 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1169 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1170 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1171 PetscCall(PetscMPIIntCast(indegree[0], &indegree0)); 1172 PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup)); 1173 PetscCallMPI(MPI_Group_free(&group)); 1174 PetscCall(PetscFree2(inranks, outranks)); 1175 PetscCall(PetscSFDestroy(&bgcount)); 1176 } 1177 *incoming = sf->ingroup; 1178 1179 if (sf->outgroup == MPI_GROUP_NULL) { 1180 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1181 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1182 PetscCallMPI(MPI_Group_free(&group)); 1183 } 1184 *outgoing = sf->outgroup; 1185 PetscFunctionReturn(PETSC_SUCCESS); 1186 } 1187 1188 /*@ 1189 PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 1190 1191 Collective 1192 1193 Input Parameter: 1194 . sf - star forest 1195 1196 Output Parameter: 1197 . rsf - the star forest with a single root per MPI process to perform communications 1198 1199 Level: developer 1200 1201 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 1202 @*/ 1203 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 1204 { 1205 PetscFunctionBegin; 1206 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1207 PetscAssertPointer(rsf, 2); 1208 if (!sf->rankssf) { 1209 PetscSFNode *rremotes; 1210 const PetscMPIInt *ranks; 1211 PetscMPIInt nranks; 1212 1213 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 1214 PetscCall(PetscMalloc1(nranks, &rremotes)); 1215 for (PetscInt i = 0; i < nranks; i++) { 1216 rremotes[i].rank = ranks[i]; 1217 rremotes[i].index = 0; 1218 } 1219 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 1220 PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 1221 } 1222 *rsf = sf->rankssf; 1223 PetscFunctionReturn(PETSC_SUCCESS); 1224 } 1225 1226 /*@ 1227 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1228 1229 Collective 1230 1231 Input Parameter: 1232 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1233 1234 Output Parameter: 1235 . multi - star forest with split roots, such that each root has degree exactly 1 (has one leaf) 1236 1237 Level: developer 1238 1239 Note: 1240 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1241 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1242 edge, it is a candidate for future optimization that might involve its removal. 1243 1244 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1245 @*/ 1246 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1247 { 1248 PetscFunctionBegin; 1249 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1250 PetscAssertPointer(multi, 2); 1251 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1252 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1253 *multi = sf->multi; 1254 sf->multi->multi = sf->multi; 1255 PetscFunctionReturn(PETSC_SUCCESS); 1256 } 1257 if (!sf->multi) { 1258 const PetscInt *indegree; 1259 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1260 PetscSFNode *remote; 1261 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1262 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1263 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1264 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1265 inoffset[0] = 0; 1266 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1267 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1268 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1269 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1270 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1271 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1272 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1273 } 1274 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1275 for (i = 0; i < sf->nleaves; i++) { 1276 remote[i].rank = sf->remote[i].rank; 1277 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1278 } 1279 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1280 sf->multi->multi = sf->multi; 1281 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1282 if (sf->rankorder) { /* Sort the ranks */ 1283 PetscMPIInt rank; 1284 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1285 PetscSFNode *newremote; 1286 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1287 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1288 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1289 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1290 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1291 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1292 /* Sort the incoming ranks at each vertex, build the inverse map */ 1293 for (i = 0; i < sf->nroots; i++) { 1294 PetscInt j; 1295 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1296 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1297 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1298 } 1299 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1300 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1301 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1302 for (i = 0; i < sf->nleaves; i++) { 1303 newremote[i].rank = sf->remote[i].rank; 1304 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1305 } 1306 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1307 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1308 } 1309 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1310 } 1311 *multi = sf->multi; 1312 PetscFunctionReturn(PETSC_SUCCESS); 1313 } 1314 1315 /*@C 1316 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1317 1318 Collective 1319 1320 Input Parameters: 1321 + sf - original star forest 1322 . nselected - number of selected roots on this MPI process 1323 - selected - indices of the selected roots on this MPI process 1324 1325 Output Parameter: 1326 . esf - new star forest 1327 1328 Level: advanced 1329 1330 Note: 1331 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1332 be done by calling PetscSFGetGraph(). 1333 1334 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1335 @*/ 1336 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *esf) 1337 { 1338 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1339 const PetscInt *ilocal; 1340 signed char *rootdata, *leafdata, *leafmem; 1341 const PetscSFNode *iremote; 1342 PetscSFNode *new_iremote; 1343 MPI_Comm comm; 1344 1345 PetscFunctionBegin; 1346 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1347 PetscSFCheckGraphSet(sf, 1); 1348 if (nselected) PetscAssertPointer(selected, 3); 1349 PetscAssertPointer(esf, 4); 1350 1351 PetscCall(PetscSFSetUp(sf)); 1352 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1353 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1354 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1355 1356 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1357 PetscBool dups; 1358 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1359 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1360 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1361 } 1362 1363 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1364 else { 1365 /* A generic version of creating embedded sf */ 1366 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1367 maxlocal = maxleaf - minleaf + 1; 1368 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1369 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1370 /* Tag selected roots and bcast to leaves */ 1371 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1372 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1373 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1374 1375 /* Build esf with leaves that are still connected */ 1376 esf_nleaves = 0; 1377 for (i = 0; i < nleaves; i++) { 1378 j = ilocal ? ilocal[i] : i; 1379 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1380 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1381 */ 1382 esf_nleaves += (leafdata[j] ? 1 : 0); 1383 } 1384 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1385 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1386 for (i = n = 0; i < nleaves; i++) { 1387 j = ilocal ? ilocal[i] : i; 1388 if (leafdata[j]) { 1389 new_ilocal[n] = j; 1390 new_iremote[n].rank = iremote[i].rank; 1391 new_iremote[n].index = iremote[i].index; 1392 ++n; 1393 } 1394 } 1395 PetscCall(PetscSFCreate(comm, esf)); 1396 PetscCall(PetscSFSetFromOptions(*esf)); 1397 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1398 PetscCall(PetscFree2(rootdata, leafmem)); 1399 } 1400 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1401 PetscFunctionReturn(PETSC_SUCCESS); 1402 } 1403 1404 /*@C 1405 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1406 1407 Collective 1408 1409 Input Parameters: 1410 + sf - original star forest 1411 . nselected - number of selected leaves on this MPI process 1412 - selected - indices of the selected leaves on this MPI process 1413 1414 Output Parameter: 1415 . newsf - new star forest 1416 1417 Level: advanced 1418 1419 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1420 @*/ 1421 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *newsf) 1422 { 1423 const PetscSFNode *iremote; 1424 PetscSFNode *new_iremote; 1425 const PetscInt *ilocal; 1426 PetscInt i, nroots, *leaves, *new_ilocal; 1427 MPI_Comm comm; 1428 1429 PetscFunctionBegin; 1430 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1431 PetscSFCheckGraphSet(sf, 1); 1432 if (nselected) PetscAssertPointer(selected, 3); 1433 PetscAssertPointer(newsf, 4); 1434 1435 /* Uniq selected[] and put results in leaves[] */ 1436 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1437 PetscCall(PetscMalloc1(nselected, &leaves)); 1438 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1439 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1440 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1441 1442 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1443 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1444 else { 1445 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1446 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1447 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1448 for (i = 0; i < nselected; ++i) { 1449 const PetscInt l = leaves[i]; 1450 new_ilocal[i] = ilocal ? ilocal[l] : l; 1451 new_iremote[i].rank = iremote[l].rank; 1452 new_iremote[i].index = iremote[l].index; 1453 } 1454 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1455 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1456 } 1457 PetscCall(PetscFree(leaves)); 1458 PetscFunctionReturn(PETSC_SUCCESS); 1459 } 1460 1461 /*@C 1462 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1463 1464 Collective 1465 1466 Input Parameters: 1467 + sf - star forest on which to communicate 1468 . unit - data type associated with each node 1469 . rootdata - buffer to broadcast 1470 - op - operation to use for reduction 1471 1472 Output Parameter: 1473 . leafdata - buffer to be reduced with values from each leaf's respective root 1474 1475 Level: intermediate 1476 1477 Note: 1478 When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers 1479 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should 1480 use `PetscSFBcastWithMemTypeBegin()` instead. 1481 1482 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1483 @*/ 1484 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1485 { 1486 PetscMemType rootmtype, leafmtype; 1487 1488 PetscFunctionBegin; 1489 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1490 PetscCall(PetscSFSetUp(sf)); 1491 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1492 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1493 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1494 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1495 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 /*@C 1500 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1501 to `PetscSFBcastEnd()` 1502 1503 Collective 1504 1505 Input Parameters: 1506 + sf - star forest on which to communicate 1507 . unit - data type associated with each node 1508 . rootmtype - memory type of `rootdata` 1509 . rootdata - buffer to broadcast 1510 . leafmtype - memory type of `leafdata` 1511 - op - operation to use for reduction 1512 1513 Output Parameter: 1514 . leafdata - buffer to be reduced with values from each leaf's respective root 1515 1516 Level: intermediate 1517 1518 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1519 @*/ 1520 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1521 { 1522 PetscFunctionBegin; 1523 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1524 PetscCall(PetscSFSetUp(sf)); 1525 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1526 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1527 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1528 PetscFunctionReturn(PETSC_SUCCESS); 1529 } 1530 1531 /*@C 1532 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1533 1534 Collective 1535 1536 Input Parameters: 1537 + sf - star forest 1538 . unit - data type 1539 . rootdata - buffer to broadcast 1540 - op - operation to use for reduction 1541 1542 Output Parameter: 1543 . leafdata - buffer to be reduced with values from each leaf's respective root 1544 1545 Level: intermediate 1546 1547 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1548 @*/ 1549 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1550 { 1551 PetscFunctionBegin; 1552 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1553 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1554 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1555 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1556 PetscFunctionReturn(PETSC_SUCCESS); 1557 } 1558 1559 /*@C 1560 PetscSFReduceBegin - begin reduction (communication) of `leafdata` into `rootdata`, to be completed with call to `PetscSFReduceEnd()` 1561 1562 Collective 1563 1564 Input Parameters: 1565 + sf - star forest 1566 . unit - data type 1567 . leafdata - values to reduce (communicate) 1568 - op - reduction operation 1569 1570 Output Parameter: 1571 . rootdata - result of reduction of values (`leafdata`) from all leaves to each root 1572 1573 Level: intermediate 1574 1575 Note: 1576 When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers 1577 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should 1578 use `PetscSFReduceWithMemTypeBegin()` instead. 1579 1580 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`, `MPI_Datatype` 1581 @*/ 1582 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1583 { 1584 PetscMemType rootmtype, leafmtype; 1585 1586 PetscFunctionBegin; 1587 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1588 PetscCall(PetscSFSetUp(sf)); 1589 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1590 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1591 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1592 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1593 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1594 PetscFunctionReturn(PETSC_SUCCESS); 1595 } 1596 1597 /*@C 1598 PetscSFReduceWithMemTypeBegin - begin reduction of `leafdata` into `rootdata` with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1599 1600 Collective 1601 1602 Input Parameters: 1603 + sf - star forest 1604 . unit - data type 1605 . leafmtype - memory type of `leafdata` 1606 . leafdata - values to reduce 1607 . rootmtype - memory type of `rootdata` 1608 - op - reduction operation 1609 1610 Output Parameter: 1611 . rootdata - result of reduction of values from all leaves of each root 1612 1613 Level: intermediate 1614 1615 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1616 @*/ 1617 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1618 { 1619 PetscFunctionBegin; 1620 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1621 PetscCall(PetscSFSetUp(sf)); 1622 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1623 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1624 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1625 PetscFunctionReturn(PETSC_SUCCESS); 1626 } 1627 1628 /*@C 1629 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1630 1631 Collective 1632 1633 Input Parameters: 1634 + sf - star forest 1635 . unit - data type 1636 . leafdata - values to reduce 1637 - op - reduction operation 1638 1639 Output Parameter: 1640 . rootdata - result of reduction of values from all leaves of each root 1641 1642 Level: intermediate 1643 1644 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1645 @*/ 1646 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1647 { 1648 PetscFunctionBegin; 1649 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1650 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1651 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1652 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1653 PetscFunctionReturn(PETSC_SUCCESS); 1654 } 1655 1656 /*@C 1657 PetscSFFetchAndOpBegin - begin operation that fetches values from `rootdata` and updates it atomically by applying operation using `leafdata`, 1658 to be completed with `PetscSFFetchAndOpEnd()` 1659 1660 Collective 1661 1662 Input Parameters: 1663 + sf - star forest 1664 . unit - data type 1665 . leafdata - leaf values to use in reduction 1666 - op - operation to use for reduction 1667 1668 Output Parameters: 1669 + rootdata - root values to be updated, input state is seen by first process to perform an update 1670 - leafupdate - state at each leaf's respective root immediately prior to atomic update 1671 1672 Level: advanced 1673 1674 Note: 1675 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1676 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1677 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1678 integers. 1679 1680 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1681 @*/ 1682 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1683 { 1684 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1685 1686 PetscFunctionBegin; 1687 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1688 PetscCall(PetscSFSetUp(sf)); 1689 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1690 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1691 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1692 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1693 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1694 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1695 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1696 PetscFunctionReturn(PETSC_SUCCESS); 1697 } 1698 1699 /*@C 1700 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1701 applying operation using `leafdata`, to be completed with `PetscSFFetchAndOpEnd()` 1702 1703 Collective 1704 1705 Input Parameters: 1706 + sf - star forest 1707 . unit - data type 1708 . rootmtype - memory type of `rootdata` 1709 . leafmtype - memory type of `leafdata` 1710 . leafdata - leaf values to use in reduction 1711 . leafupdatemtype - memory type of `leafupdate` 1712 - op - operation to use for reduction 1713 1714 Output Parameters: 1715 + rootdata - root values to be updated, input state is seen by first process to perform an update 1716 - leafupdate - state at each leaf's respective root immediately prior to atomic update 1717 1718 Level: advanced 1719 1720 Note: 1721 See `PetscSFFetchAndOpBegin()` for more details. 1722 1723 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1724 @*/ 1725 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1726 { 1727 PetscFunctionBegin; 1728 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1729 PetscCall(PetscSFSetUp(sf)); 1730 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1731 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1732 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1733 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1734 PetscFunctionReturn(PETSC_SUCCESS); 1735 } 1736 1737 /*@C 1738 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1739 to fetch values from roots and update atomically by applying operation using `leafdata` 1740 1741 Collective 1742 1743 Input Parameters: 1744 + sf - star forest 1745 . unit - data type 1746 . leafdata - leaf values to use in reduction 1747 - op - operation to use for reduction 1748 1749 Output Parameters: 1750 + rootdata - root values to be updated, input state is seen by first process to perform an update 1751 - leafupdate - state at each leaf's respective root immediately prior to atomic update 1752 1753 Level: advanced 1754 1755 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1756 @*/ 1757 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1758 { 1759 PetscFunctionBegin; 1760 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1761 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1762 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1763 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1764 PetscFunctionReturn(PETSC_SUCCESS); 1765 } 1766 1767 /*@C 1768 PetscSFComputeDegreeBegin - begin computation of the degree of each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1769 1770 Collective 1771 1772 Input Parameter: 1773 . sf - star forest 1774 1775 Output Parameter: 1776 . degree - degree (the number of leaves) of each root vertex 1777 1778 Level: advanced 1779 1780 Note: 1781 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1782 1783 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1784 @*/ 1785 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[]) 1786 { 1787 PetscFunctionBegin; 1788 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1789 PetscSFCheckGraphSet(sf, 1); 1790 PetscAssertPointer(degree, 2); 1791 if (!sf->degreeknown) { 1792 PetscInt i, nroots = sf->nroots, maxlocal; 1793 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1794 maxlocal = sf->maxleaf - sf->minleaf + 1; 1795 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1796 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1797 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1798 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1799 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1800 } 1801 *degree = NULL; 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 /*@C 1806 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1807 1808 Collective 1809 1810 Input Parameter: 1811 . sf - star forest 1812 1813 Output Parameter: 1814 . degree - degree of each root vertex 1815 1816 Level: developer 1817 1818 Note: 1819 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1820 1821 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1822 @*/ 1823 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[]) 1824 { 1825 PetscFunctionBegin; 1826 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1827 PetscSFCheckGraphSet(sf, 1); 1828 PetscAssertPointer(degree, 2); 1829 if (!sf->degreeknown) { 1830 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1831 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1832 PetscCall(PetscFree(sf->degreetmp)); 1833 sf->degreeknown = PETSC_TRUE; 1834 } 1835 *degree = sf->degree; 1836 PetscFunctionReturn(PETSC_SUCCESS); 1837 } 1838 1839 /*@C 1840 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1841 Each multi-root is assigned index of the corresponding original root. 1842 1843 Collective 1844 1845 Input Parameters: 1846 + sf - star forest 1847 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()` and `PetscSFComputeDegreeEnd()` 1848 1849 Output Parameters: 1850 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1851 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1852 1853 Level: developer 1854 1855 Note: 1856 The returned array `multiRootsOrigNumbering` should be destroyed with `PetscFree()` when no longer needed. 1857 1858 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1859 @*/ 1860 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1861 { 1862 PetscSF msf; 1863 PetscInt k = 0, nroots, nmroots; 1864 1865 PetscFunctionBegin; 1866 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1867 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1868 if (nroots) PetscAssertPointer(degree, 2); 1869 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1870 PetscAssertPointer(multiRootsOrigNumbering, 4); 1871 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1872 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1873 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1874 for (PetscInt i = 0; i < nroots; i++) { 1875 if (!degree[i]) continue; 1876 for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1877 } 1878 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1879 if (nMultiRoots) *nMultiRoots = nmroots; 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 /*@C 1884 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1885 1886 Collective 1887 1888 Input Parameters: 1889 + sf - star forest 1890 . unit - data type 1891 - leafdata - leaf data to gather to roots 1892 1893 Output Parameter: 1894 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has) 1895 1896 Level: intermediate 1897 1898 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1899 @*/ 1900 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1901 { 1902 PetscSF multi = NULL; 1903 1904 PetscFunctionBegin; 1905 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1906 PetscCall(PetscSFSetUp(sf)); 1907 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1908 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1909 PetscFunctionReturn(PETSC_SUCCESS); 1910 } 1911 1912 /*@C 1913 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1914 1915 Collective 1916 1917 Input Parameters: 1918 + sf - star forest 1919 . unit - data type 1920 - leafdata - leaf data to gather to roots 1921 1922 Output Parameter: 1923 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has) 1924 1925 Level: intermediate 1926 1927 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1928 @*/ 1929 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1930 { 1931 PetscSF multi = NULL; 1932 1933 PetscFunctionBegin; 1934 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1935 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1936 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1937 PetscFunctionReturn(PETSC_SUCCESS); 1938 } 1939 1940 /*@C 1941 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1942 1943 Collective 1944 1945 Input Parameters: 1946 + sf - star forest 1947 . unit - data type 1948 - multirootdata - root buffer to send to each leaf, one unit of data is provided to each leaf thus the amount of space per root is equal to its degree (which is the number of leaves that root has) 1949 1950 Output Parameter: 1951 . leafdata - leaf data to be update with personal data from each respective root 1952 1953 Level: intermediate 1954 1955 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1956 @*/ 1957 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1958 { 1959 PetscSF multi = NULL; 1960 1961 PetscFunctionBegin; 1962 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1963 PetscCall(PetscSFSetUp(sf)); 1964 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1965 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1966 PetscFunctionReturn(PETSC_SUCCESS); 1967 } 1968 1969 /*@C 1970 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1971 1972 Collective 1973 1974 Input Parameters: 1975 + sf - star forest 1976 . unit - data type 1977 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1978 1979 Output Parameter: 1980 . leafdata - leaf data to be update with personal data from each respective root 1981 1982 Level: intermediate 1983 1984 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1985 @*/ 1986 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1987 { 1988 PetscSF multi = NULL; 1989 1990 PetscFunctionBegin; 1991 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1992 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1993 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1994 PetscFunctionReturn(PETSC_SUCCESS); 1995 } 1996 1997 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1998 { 1999 PetscInt i, n, nleaves; 2000 const PetscInt *ilocal = NULL; 2001 PetscHSetI seen; 2002 2003 PetscFunctionBegin; 2004 if (PetscDefined(USE_DEBUG)) { 2005 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 2006 PetscCall(PetscHSetICreate(&seen)); 2007 for (i = 0; i < nleaves; i++) { 2008 const PetscInt leaf = ilocal ? ilocal[i] : i; 2009 PetscCall(PetscHSetIAdd(seen, leaf)); 2010 } 2011 PetscCall(PetscHSetIGetSize(seen, &n)); 2012 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 2013 PetscCall(PetscHSetIDestroy(&seen)); 2014 } 2015 PetscFunctionReturn(PETSC_SUCCESS); 2016 } 2017 2018 /*@ 2019 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2020 2021 Input Parameters: 2022 + sfA - The first `PetscSF` 2023 - sfB - The second `PetscSF` 2024 2025 Output Parameter: 2026 . sfBA - The composite `PetscSF` 2027 2028 Level: developer 2029 2030 Notes: 2031 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2032 forests, i.e. the same leaf is not connected with different roots. 2033 2034 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 2035 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 2036 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 2037 `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfB`, on connected nodes. 2038 2039 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2040 @*/ 2041 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2042 { 2043 const PetscSFNode *remotePointsA, *remotePointsB; 2044 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 2045 const PetscInt *localPointsA, *localPointsB; 2046 PetscInt *localPointsBA; 2047 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 2048 PetscBool denseB; 2049 2050 PetscFunctionBegin; 2051 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2052 PetscSFCheckGraphSet(sfA, 1); 2053 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2054 PetscSFCheckGraphSet(sfB, 2); 2055 PetscCheckSameComm(sfA, 1, sfB, 2); 2056 PetscAssertPointer(sfBA, 3); 2057 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2058 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2059 2060 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2061 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2062 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2063 numRootsB; otherwise, garbage will be broadcasted. 2064 Example (comm size = 1): 2065 sfA: 0 <- (0, 0) 2066 sfB: 100 <- (0, 0) 2067 101 <- (0, 1) 2068 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2069 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2070 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2071 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2072 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2073 for (i = 0; i < numRootsB; i++) { 2074 reorderedRemotePointsA[i].rank = -1; 2075 reorderedRemotePointsA[i].index = -1; 2076 } 2077 for (i = 0; i < numLeavesA; i++) { 2078 PetscInt localp = localPointsA ? localPointsA[i] : i; 2079 2080 if (localp >= numRootsB) continue; 2081 reorderedRemotePointsA[localp] = remotePointsA[i]; 2082 } 2083 remotePointsA = reorderedRemotePointsA; 2084 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2085 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2086 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2087 leafdataB[i].rank = -1; 2088 leafdataB[i].index = -1; 2089 } 2090 PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2091 PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2092 PetscCall(PetscFree(reorderedRemotePointsA)); 2093 2094 denseB = (PetscBool)!localPointsB; 2095 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2096 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2097 else numLeavesBA++; 2098 } 2099 if (denseB) { 2100 localPointsBA = NULL; 2101 remotePointsBA = leafdataB; 2102 } else { 2103 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2104 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2105 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2106 const PetscInt l = localPointsB ? localPointsB[i] : i; 2107 2108 if (leafdataB[l - minleaf].rank == -1) continue; 2109 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2110 localPointsBA[numLeavesBA] = l; 2111 numLeavesBA++; 2112 } 2113 PetscCall(PetscFree(leafdataB)); 2114 } 2115 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2116 PetscCall(PetscSFSetFromOptions(*sfBA)); 2117 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2118 PetscFunctionReturn(PETSC_SUCCESS); 2119 } 2120 2121 /*@ 2122 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2123 2124 Input Parameters: 2125 + sfA - The first `PetscSF` 2126 - sfB - The second `PetscSF` 2127 2128 Output Parameter: 2129 . sfBA - The composite `PetscSF`. 2130 2131 Level: developer 2132 2133 Notes: 2134 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2135 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2136 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2137 2138 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2139 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2140 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` 2141 on `sfA`, then 2142 a `PetscSFReduceBegin()` and `PetscSFReduceEnd()` on `sfB`, on connected roots. 2143 2144 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2145 @*/ 2146 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2147 { 2148 const PetscSFNode *remotePointsA, *remotePointsB; 2149 PetscSFNode *remotePointsBA; 2150 const PetscInt *localPointsA, *localPointsB; 2151 PetscSFNode *reorderedRemotePointsA = NULL; 2152 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2153 MPI_Op op; 2154 #if defined(PETSC_USE_64BIT_INDICES) 2155 PetscBool iswin; 2156 #endif 2157 2158 PetscFunctionBegin; 2159 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2160 PetscSFCheckGraphSet(sfA, 1); 2161 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2162 PetscSFCheckGraphSet(sfB, 2); 2163 PetscCheckSameComm(sfA, 1, sfB, 2); 2164 PetscAssertPointer(sfBA, 3); 2165 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2166 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2167 2168 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2169 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2170 2171 /* TODO: Check roots of sfB have degree of 1 */ 2172 /* Once we implement it, we can replace the MPI_MAXLOC 2173 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2174 We use MPI_MAXLOC only to have a deterministic output from this routine if 2175 the root condition is not meet. 2176 */ 2177 op = MPI_MAXLOC; 2178 #if defined(PETSC_USE_64BIT_INDICES) 2179 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2180 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2181 if (iswin) op = MPI_REPLACE; 2182 #endif 2183 2184 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2185 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2186 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2187 reorderedRemotePointsA[i].rank = -1; 2188 reorderedRemotePointsA[i].index = -1; 2189 } 2190 if (localPointsA) { 2191 for (i = 0; i < numLeavesA; i++) { 2192 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2193 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2194 } 2195 } else { 2196 for (i = 0; i < numLeavesA; i++) { 2197 if (i > maxleaf || i < minleaf) continue; 2198 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2199 } 2200 } 2201 2202 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2203 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2204 for (i = 0; i < numRootsB; i++) { 2205 remotePointsBA[i].rank = -1; 2206 remotePointsBA[i].index = -1; 2207 } 2208 2209 PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2210 PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2211 PetscCall(PetscFree(reorderedRemotePointsA)); 2212 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2213 if (remotePointsBA[i].rank == -1) continue; 2214 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2215 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2216 localPointsBA[numLeavesBA] = i; 2217 numLeavesBA++; 2218 } 2219 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2220 PetscCall(PetscSFSetFromOptions(*sfBA)); 2221 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2222 PetscFunctionReturn(PETSC_SUCCESS); 2223 } 2224 2225 /* 2226 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2227 2228 Input Parameter: 2229 . sf - The global `PetscSF` 2230 2231 Output Parameter: 2232 . out - The local `PetscSF` 2233 2234 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()` 2235 */ 2236 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2237 { 2238 MPI_Comm comm; 2239 PetscMPIInt myrank; 2240 const PetscInt *ilocal; 2241 const PetscSFNode *iremote; 2242 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2243 PetscSFNode *liremote; 2244 PetscSF lsf; 2245 2246 PetscFunctionBegin; 2247 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2248 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2249 else { 2250 PetscMPIInt irank; 2251 2252 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2253 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2254 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2255 2256 /* Find out local edges and build a local SF */ 2257 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2258 for (i = lnleaves = 0; i < nleaves; i++) { 2259 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2260 if (irank == myrank) lnleaves++; 2261 } 2262 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2263 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2264 2265 for (i = j = 0; i < nleaves; i++) { 2266 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2267 if (irank == myrank) { 2268 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2269 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2270 liremote[j].index = iremote[i].index; 2271 j++; 2272 } 2273 } 2274 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2275 PetscCall(PetscSFSetFromOptions(lsf)); 2276 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2277 PetscCall(PetscSFSetUp(lsf)); 2278 *out = lsf; 2279 } 2280 PetscFunctionReturn(PETSC_SUCCESS); 2281 } 2282 2283 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2284 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2285 { 2286 PetscMemType rootmtype, leafmtype; 2287 2288 PetscFunctionBegin; 2289 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2290 PetscCall(PetscSFSetUp(sf)); 2291 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2292 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2293 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2294 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2295 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2296 PetscFunctionReturn(PETSC_SUCCESS); 2297 } 2298 2299 /*@ 2300 PetscSFConcatenate - concatenate multiple `PetscSF` into a new `PetscSF` 2301 2302 Input Parameters: 2303 + comm - the communicator 2304 . nsfs - the number of input `PetscSF` 2305 . sfs - the array of input `PetscSF` 2306 . rootMode - the root mode specifying how roots are handled 2307 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2308 2309 Output Parameter: 2310 . newsf - The resulting `PetscSF` 2311 2312 Level: advanced 2313 2314 Notes: 2315 The communicator of all `PetscSF`s in `sfs` must be comm. 2316 2317 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2318 2319 The offsets in `leafOffsets` are added to the original leaf indices. 2320 2321 If all input `PetscSF`s use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2322 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2323 2324 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2325 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2326 2327 All root modes retain the essential connectivity condition. 2328 2329 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2330 2331 Parameter `rootMode` controls how the input root spaces are combined. 2332 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2333 and is also the same in the output `PetscSF`. 2334 2335 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2336 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2337 roots of sfs[0], sfs[1], sfs[2], ... are joined on each MPI process separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2338 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2339 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2340 the original root MPI processes are ignored. 2341 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2342 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each MPI process 2343 to keep the load balancing. 2344 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different MPI processes. 2345 2346 Example: 2347 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2348 .vb 2349 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2350 for m in {local,global,shared}; do 2351 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2352 done 2353 .ve 2354 we generate two identical `PetscSF`s sf_0 and sf_1, 2355 .vb 2356 PetscSF Object: sf_0 2 MPI processes 2357 type: basic 2358 rank #leaves #roots 2359 [ 0] 4 2 2360 [ 1] 4 2 2361 leaves roots roots in global numbering 2362 ( 0, 0) <- ( 0, 0) = 0 2363 ( 0, 1) <- ( 0, 1) = 1 2364 ( 0, 2) <- ( 1, 0) = 2 2365 ( 0, 3) <- ( 1, 1) = 3 2366 ( 1, 0) <- ( 0, 0) = 0 2367 ( 1, 1) <- ( 0, 1) = 1 2368 ( 1, 2) <- ( 1, 0) = 2 2369 ( 1, 3) <- ( 1, 1) = 3 2370 .ve 2371 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2372 .vb 2373 rootMode = local: 2374 PetscSF Object: result_sf 2 MPI processes 2375 type: basic 2376 rank #leaves #roots 2377 [ 0] 8 4 2378 [ 1] 8 4 2379 leaves roots roots in global numbering 2380 ( 0, 0) <- ( 0, 0) = 0 2381 ( 0, 1) <- ( 0, 1) = 1 2382 ( 0, 2) <- ( 1, 0) = 4 2383 ( 0, 3) <- ( 1, 1) = 5 2384 ( 0, 4) <- ( 0, 2) = 2 2385 ( 0, 5) <- ( 0, 3) = 3 2386 ( 0, 6) <- ( 1, 2) = 6 2387 ( 0, 7) <- ( 1, 3) = 7 2388 ( 1, 0) <- ( 0, 0) = 0 2389 ( 1, 1) <- ( 0, 1) = 1 2390 ( 1, 2) <- ( 1, 0) = 4 2391 ( 1, 3) <- ( 1, 1) = 5 2392 ( 1, 4) <- ( 0, 2) = 2 2393 ( 1, 5) <- ( 0, 3) = 3 2394 ( 1, 6) <- ( 1, 2) = 6 2395 ( 1, 7) <- ( 1, 3) = 7 2396 2397 rootMode = global: 2398 PetscSF Object: result_sf 2 MPI processes 2399 type: basic 2400 rank #leaves #roots 2401 [ 0] 8 4 2402 [ 1] 8 4 2403 leaves roots roots in global numbering 2404 ( 0, 0) <- ( 0, 0) = 0 2405 ( 0, 1) <- ( 0, 1) = 1 2406 ( 0, 2) <- ( 0, 2) = 2 2407 ( 0, 3) <- ( 0, 3) = 3 2408 ( 0, 4) <- ( 1, 0) = 4 2409 ( 0, 5) <- ( 1, 1) = 5 2410 ( 0, 6) <- ( 1, 2) = 6 2411 ( 0, 7) <- ( 1, 3) = 7 2412 ( 1, 0) <- ( 0, 0) = 0 2413 ( 1, 1) <- ( 0, 1) = 1 2414 ( 1, 2) <- ( 0, 2) = 2 2415 ( 1, 3) <- ( 0, 3) = 3 2416 ( 1, 4) <- ( 1, 0) = 4 2417 ( 1, 5) <- ( 1, 1) = 5 2418 ( 1, 6) <- ( 1, 2) = 6 2419 ( 1, 7) <- ( 1, 3) = 7 2420 2421 rootMode = shared: 2422 PetscSF Object: result_sf 2 MPI processes 2423 type: basic 2424 rank #leaves #roots 2425 [ 0] 8 2 2426 [ 1] 8 2 2427 leaves roots roots in global numbering 2428 ( 0, 0) <- ( 0, 0) = 0 2429 ( 0, 1) <- ( 0, 1) = 1 2430 ( 0, 2) <- ( 1, 0) = 2 2431 ( 0, 3) <- ( 1, 1) = 3 2432 ( 0, 4) <- ( 0, 0) = 0 2433 ( 0, 5) <- ( 0, 1) = 1 2434 ( 0, 6) <- ( 1, 0) = 2 2435 ( 0, 7) <- ( 1, 1) = 3 2436 ( 1, 0) <- ( 0, 0) = 0 2437 ( 1, 1) <- ( 0, 1) = 1 2438 ( 1, 2) <- ( 1, 0) = 2 2439 ( 1, 3) <- ( 1, 1) = 3 2440 ( 1, 4) <- ( 0, 0) = 0 2441 ( 1, 5) <- ( 0, 1) = 1 2442 ( 1, 6) <- ( 1, 0) = 2 2443 ( 1, 7) <- ( 1, 1) = 3 2444 .ve 2445 2446 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2447 @*/ 2448 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2449 { 2450 PetscInt i, s, nLeaves, nRoots; 2451 PetscInt *leafArrayOffsets; 2452 PetscInt *ilocal_new; 2453 PetscSFNode *iremote_new; 2454 PetscBool all_ilocal_null = PETSC_FALSE; 2455 PetscLayout glayout = NULL; 2456 PetscInt *gremote = NULL; 2457 PetscMPIInt rank, size; 2458 2459 PetscFunctionBegin; 2460 if (PetscDefined(USE_DEBUG)) { 2461 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2462 2463 PetscCall(PetscSFCreate(comm, &dummy)); 2464 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2465 PetscAssertPointer(sfs, 3); 2466 for (i = 0; i < nsfs; i++) { 2467 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2468 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2469 } 2470 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2471 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2472 PetscAssertPointer(newsf, 6); 2473 PetscCall(PetscSFDestroy(&dummy)); 2474 } 2475 if (!nsfs) { 2476 PetscCall(PetscSFCreate(comm, newsf)); 2477 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2478 PetscFunctionReturn(PETSC_SUCCESS); 2479 } 2480 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2481 PetscCallMPI(MPI_Comm_size(comm, &size)); 2482 2483 /* Calculate leaf array offsets */ 2484 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2485 leafArrayOffsets[0] = 0; 2486 for (s = 0; s < nsfs; s++) { 2487 PetscInt nl; 2488 2489 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2490 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2491 } 2492 nLeaves = leafArrayOffsets[nsfs]; 2493 2494 /* Calculate number of roots */ 2495 switch (rootMode) { 2496 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2497 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2498 if (PetscDefined(USE_DEBUG)) { 2499 for (s = 1; s < nsfs; s++) { 2500 PetscInt nr; 2501 2502 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2503 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2504 } 2505 } 2506 } break; 2507 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2508 /* Calculate also global layout in this case */ 2509 PetscInt *nls; 2510 PetscLayout *lts; 2511 PetscInt **inds; 2512 PetscInt j; 2513 PetscInt rootOffset = 0; 2514 2515 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2516 PetscCall(PetscLayoutCreate(comm, &glayout)); 2517 glayout->bs = 1; 2518 glayout->n = 0; 2519 glayout->N = 0; 2520 for (s = 0; s < nsfs; s++) { 2521 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2522 glayout->n += lts[s]->n; 2523 glayout->N += lts[s]->N; 2524 } 2525 PetscCall(PetscLayoutSetUp(glayout)); 2526 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2527 for (s = 0, j = 0; s < nsfs; s++) { 2528 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2529 rootOffset += lts[s]->N; 2530 PetscCall(PetscLayoutDestroy(<s[s])); 2531 PetscCall(PetscFree(inds[s])); 2532 } 2533 PetscCall(PetscFree3(lts, nls, inds)); 2534 nRoots = glayout->N; 2535 } break; 2536 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2537 /* nRoots calculated later in this case */ 2538 break; 2539 default: 2540 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2541 } 2542 2543 if (!leafOffsets) { 2544 all_ilocal_null = PETSC_TRUE; 2545 for (s = 0; s < nsfs; s++) { 2546 const PetscInt *ilocal; 2547 2548 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2549 if (ilocal) { 2550 all_ilocal_null = PETSC_FALSE; 2551 break; 2552 } 2553 } 2554 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2555 } 2556 2557 /* Renumber and concatenate local leaves */ 2558 ilocal_new = NULL; 2559 if (!all_ilocal_null) { 2560 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2561 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2562 for (s = 0; s < nsfs; s++) { 2563 const PetscInt *ilocal; 2564 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2565 PetscInt i, nleaves_l; 2566 2567 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2568 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2569 } 2570 } 2571 2572 /* Renumber and concatenate remote roots */ 2573 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2574 PetscInt rootOffset = 0; 2575 2576 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2577 for (i = 0; i < nLeaves; i++) { 2578 iremote_new[i].rank = -1; 2579 iremote_new[i].index = -1; 2580 } 2581 for (s = 0; s < nsfs; s++) { 2582 PetscInt i, nl, nr; 2583 PetscSF tmp_sf; 2584 const PetscSFNode *iremote; 2585 PetscSFNode *tmp_rootdata; 2586 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2587 2588 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2589 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2590 /* create helper SF with contiguous leaves */ 2591 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2592 PetscCall(PetscSFSetUp(tmp_sf)); 2593 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2594 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2595 for (i = 0; i < nr; i++) { 2596 tmp_rootdata[i].index = i + rootOffset; 2597 tmp_rootdata[i].rank = rank; 2598 } 2599 rootOffset += nr; 2600 } else { 2601 for (i = 0; i < nr; i++) { 2602 tmp_rootdata[i].index = i; 2603 tmp_rootdata[i].rank = rank; 2604 } 2605 } 2606 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2607 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2608 PetscCall(PetscSFDestroy(&tmp_sf)); 2609 PetscCall(PetscFree(tmp_rootdata)); 2610 } 2611 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2612 2613 /* Build the new SF */ 2614 PetscCall(PetscSFCreate(comm, newsf)); 2615 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2616 } else { 2617 /* Build the new SF */ 2618 PetscCall(PetscSFCreate(comm, newsf)); 2619 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2620 } 2621 PetscCall(PetscSFSetUp(*newsf)); 2622 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2623 PetscCall(PetscLayoutDestroy(&glayout)); 2624 PetscCall(PetscFree(gremote)); 2625 PetscCall(PetscFree(leafArrayOffsets)); 2626 PetscFunctionReturn(PETSC_SUCCESS); 2627 } 2628 2629 /*@ 2630 PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated `PetscSF` communications. 2631 2632 Collective 2633 2634 Input Parameters: 2635 + sf - star forest 2636 . unit - the data type contained within `rootdata` and `leafdata` 2637 . rootdata - root data that will be used for multiple `PetscSF` communications 2638 - leafdata - leaf data that will be used for multiple `PetscSF` communications 2639 2640 Level: advanced 2641 2642 Notes: 2643 Implementations of `PetscSF` can make optimizations 2644 for repeated communication using the same memory regions, but these optimizations 2645 can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed. 2646 The intended pattern is 2647 2648 .vb 2649 PetscMalloc2(nroots, &rootdata, nleaves, &leafdata); 2650 2651 PetscSFRegisterPersistent(sf, unit, rootdata, leafdata); 2652 // repeated use of rootdata and leafdata will now be optimized 2653 2654 PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE); 2655 PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE); 2656 // ... 2657 PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM); 2658 PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM); 2659 // ... (other communications) 2660 2661 // rootdata and leafdata must be deregistered before freeing 2662 // skipping this can lead to undefined behavior including 2663 // deadlocks 2664 PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata); 2665 2666 // it is now safe to free rootdata and leafdata 2667 PetscFree2(rootdata, leafdata); 2668 .ve 2669 2670 If you do not register `rootdata` and `leafdata` it will not cause an error, 2671 but optimizations that reduce the setup time for each communication cannot be 2672 made. Currently, the only implementation of `PetscSF` that benefits from 2673 `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default 2674 `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`. 2675 2676 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()` 2677 @*/ 2678 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2679 { 2680 PetscFunctionBegin; 2681 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2682 PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2683 PetscFunctionReturn(PETSC_SUCCESS); 2684 } 2685 2686 /*@ 2687 PetscSFDeregisterPersistent - Signal that repeated usage of `rootdata` and `leafdata` for `PetscSF` communication has concluded. 2688 2689 Collective 2690 2691 Input Parameters: 2692 + sf - star forest 2693 . unit - the data type contained within the `rootdata` and `leafdata` 2694 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()` 2695 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()` 2696 2697 Level: advanced 2698 2699 Note: 2700 See `PetscSFRegisterPersistent()` for when and how to use this function. 2701 2702 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()` 2703 @*/ 2704 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2705 { 2706 PetscFunctionBegin; 2707 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2708 PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2709 PetscFunctionReturn(PETSC_SUCCESS); 2710 } 2711 2712 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size) 2713 { 2714 MPI_Aint lb, lb_true, bytes, bytes_true; 2715 2716 PetscFunctionBegin; 2717 PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 2718 PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 2719 PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 2720 *size = bytes; 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723