1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <petscdevice_cuda.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 extern void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Key: 41 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window - Use MPI-3 one-sided window for communication 43 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 44 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 45 46 Level: intermediate 47 48 Note: 49 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 50 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 51 `PetscSF`s are optimized and have better performance than the general `PetscSF`s. 52 53 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 54 @*/ 55 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 56 { 57 PetscSF b; 58 59 PetscFunctionBegin; 60 PetscAssertPointer(sf, 2); 61 PetscCall(PetscSFInitializePackage()); 62 63 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 64 b->nroots = -1; 65 b->nleaves = -1; 66 b->minleaf = PETSC_INT_MAX; 67 b->maxleaf = PETSC_INT_MIN; 68 b->nranks = -1; 69 b->rankorder = PETSC_TRUE; 70 b->ingroup = MPI_GROUP_NULL; 71 b->outgroup = MPI_GROUP_NULL; 72 b->graphset = PETSC_FALSE; 73 #if defined(PETSC_HAVE_DEVICE) 74 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 75 b->use_stream_aware_mpi = PETSC_FALSE; 76 b->unknown_input_stream = PETSC_FALSE; 77 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 78 b->backend = PETSCSF_BACKEND_KOKKOS; 79 #elif defined(PETSC_HAVE_CUDA) 80 b->backend = PETSCSF_BACKEND_CUDA; 81 #elif defined(PETSC_HAVE_HIP) 82 b->backend = PETSCSF_BACKEND_HIP; 83 #endif 84 85 #if defined(PETSC_HAVE_NVSHMEM) 86 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 87 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 88 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 89 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 90 #endif 91 #endif 92 b->vscat.from_n = -1; 93 b->vscat.to_n = -1; 94 b->vscat.unit = MPIU_SCALAR; 95 *sf = b; 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 /*@ 100 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 101 102 Collective 103 104 Input Parameter: 105 . sf - star forest 106 107 Level: advanced 108 109 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 110 @*/ 111 PetscErrorCode PetscSFReset(PetscSF sf) 112 { 113 PetscFunctionBegin; 114 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 115 PetscTryTypeMethod(sf, Reset); 116 PetscCall(PetscSFDestroy(&sf->rankssf)); 117 118 sf->nroots = -1; 119 sf->nleaves = -1; 120 sf->minleaf = PETSC_INT_MAX; 121 sf->maxleaf = PETSC_INT_MIN; 122 sf->mine = NULL; 123 sf->remote = NULL; 124 sf->graphset = PETSC_FALSE; 125 PetscCall(PetscFree(sf->mine_alloc)); 126 PetscCall(PetscFree(sf->remote_alloc)); 127 sf->nranks = -1; 128 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 129 sf->degreeknown = PETSC_FALSE; 130 PetscCall(PetscFree(sf->degree)); 131 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 132 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 133 134 if (sf->multi) sf->multi->multi = NULL; 135 PetscCall(PetscSFDestroy(&sf->multi)); 136 137 PetscCall(PetscLayoutDestroy(&sf->map)); 138 139 #if defined(PETSC_HAVE_DEVICE) 140 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 141 #endif 142 143 sf->setupcalled = PETSC_FALSE; 144 PetscFunctionReturn(PETSC_SUCCESS); 145 } 146 147 /*@ 148 PetscSFSetType - Set the `PetscSF` communication implementation 149 150 Collective 151 152 Input Parameters: 153 + sf - the `PetscSF` context 154 - type - a known method 155 .vb 156 PETSCSFWINDOW - MPI-2/3 one-sided 157 PETSCSFBASIC - basic implementation using MPI-1 two-sided 158 .ve 159 160 Options Database Key: 161 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 162 163 Level: intermediate 164 165 Notes: 166 See `PetscSFType` for possible values 167 168 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()` 169 @*/ 170 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 171 { 172 PetscBool match; 173 PetscErrorCode (*r)(PetscSF); 174 175 PetscFunctionBegin; 176 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 177 PetscAssertPointer(type, 2); 178 179 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 180 if (match) PetscFunctionReturn(PETSC_SUCCESS); 181 182 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 183 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 184 /* Destroy the previous PetscSF implementation context */ 185 PetscTryTypeMethod(sf, Destroy); 186 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 187 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 188 PetscCall((*r)(sf)); 189 PetscFunctionReturn(PETSC_SUCCESS); 190 } 191 192 /*@ 193 PetscSFGetType - Get the `PetscSF` communication implementation 194 195 Not Collective 196 197 Input Parameter: 198 . sf - the `PetscSF` context 199 200 Output Parameter: 201 . type - the `PetscSF` type name 202 203 Level: intermediate 204 205 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 206 @*/ 207 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 208 { 209 PetscFunctionBegin; 210 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 211 PetscAssertPointer(type, 2); 212 *type = ((PetscObject)sf)->type_name; 213 PetscFunctionReturn(PETSC_SUCCESS); 214 } 215 216 /*@ 217 PetscSFDestroy - destroy a star forest 218 219 Collective 220 221 Input Parameter: 222 . sf - address of star forest 223 224 Level: intermediate 225 226 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 227 @*/ 228 PetscErrorCode PetscSFDestroy(PetscSF *sf) 229 { 230 PetscFunctionBegin; 231 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 232 PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 233 if (--((PetscObject)*sf)->refct > 0) { 234 *sf = NULL; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 PetscCall(PetscSFReset(*sf)); 238 PetscTryTypeMethod(*sf, Destroy); 239 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 240 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 241 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 242 if ((*sf)->use_stream_aware_mpi) { 243 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 244 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 245 } 246 #endif 247 PetscCall(PetscHeaderDestroy(sf)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 252 { 253 PetscInt i, nleaves; 254 PetscMPIInt size; 255 const PetscInt *ilocal; 256 const PetscSFNode *iremote; 257 258 PetscFunctionBegin; 259 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 260 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 261 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 262 for (i = 0; i < nleaves; i++) { 263 const PetscInt rank = iremote[i].rank; 264 const PetscInt remote = iremote[i].index; 265 const PetscInt leaf = ilocal ? ilocal[i] : i; 266 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 267 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 268 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 269 } 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 /*@ 274 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 275 276 Collective 277 278 Input Parameter: 279 . sf - star forest communication object 280 281 Level: beginner 282 283 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 284 @*/ 285 PetscErrorCode PetscSFSetUp(PetscSF sf) 286 { 287 PetscFunctionBegin; 288 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 289 PetscSFCheckGraphSet(sf, 1); 290 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 291 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 292 PetscCall(PetscSFCheckGraphValid_Private(sf)); 293 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 294 PetscTryTypeMethod(sf, SetUp); 295 #if defined(PETSC_HAVE_CUDA) 296 if (sf->backend == PETSCSF_BACKEND_CUDA) { 297 sf->ops->Malloc = PetscSFMalloc_CUDA; 298 sf->ops->Free = PetscSFFree_CUDA; 299 } 300 #endif 301 #if defined(PETSC_HAVE_HIP) 302 if (sf->backend == PETSCSF_BACKEND_HIP) { 303 sf->ops->Malloc = PetscSFMalloc_HIP; 304 sf->ops->Free = PetscSFFree_HIP; 305 } 306 #endif 307 308 #if defined(PETSC_HAVE_KOKKOS) 309 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 310 sf->ops->Malloc = PetscSFMalloc_Kokkos; 311 sf->ops->Free = PetscSFFree_Kokkos; 312 } 313 #endif 314 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 315 sf->setupcalled = PETSC_TRUE; 316 PetscFunctionReturn(PETSC_SUCCESS); 317 } 318 319 /*@ 320 PetscSFSetFromOptions - set `PetscSF` options using the options database 321 322 Logically Collective 323 324 Input Parameter: 325 . sf - star forest 326 327 Options Database Keys: 328 + -sf_type - implementation type, see `PetscSFSetType()` 329 . -sf_rank_order - sort composite points for gathers and scatters in MPI rank order, gathers are non-deterministic otherwise 330 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 331 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 332 If true, this option only works with `-use_gpu_aware_mpi 1`. 333 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 334 If true, this option only works with `-use_gpu_aware_mpi 1`. 335 336 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 337 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 338 the only available is kokkos. 339 340 Level: intermediate 341 342 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 343 @*/ 344 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 345 { 346 PetscSFType deft; 347 char type[256]; 348 PetscBool flg; 349 350 PetscFunctionBegin; 351 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 352 PetscObjectOptionsBegin((PetscObject)sf); 353 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 354 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 355 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 356 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 357 PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL)); 358 #if defined(PETSC_HAVE_DEVICE) 359 { 360 char backendstr[32] = {0}; 361 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 362 /* Change the defaults set in PetscSFCreate() with command line options */ 363 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 364 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 365 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 366 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 367 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 368 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 369 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 370 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 371 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 372 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 373 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 374 #elif defined(PETSC_HAVE_KOKKOS) 375 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 376 #endif 377 378 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 379 if (sf->use_stream_aware_mpi) { 380 MPI_Info info; 381 382 PetscCallMPI(MPI_Info_create(&info)); 383 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 384 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 385 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 386 PetscCallMPI(MPI_Info_free(&info)); 387 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 388 } 389 #endif 390 } 391 #endif 392 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 393 PetscOptionsEnd(); 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 397 /*@ 398 PetscSFSetRankOrder - sort multi-points for gathers and scatters by MPI rank order 399 400 Logically Collective 401 402 Input Parameters: 403 + sf - star forest 404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (false has a lower setup cost, but is non-deterministic) 405 406 Level: advanced 407 408 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 409 @*/ 410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 411 { 412 PetscFunctionBegin; 413 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 414 PetscValidLogicalCollectiveBool(sf, flg, 2); 415 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 416 sf->rankorder = flg; 417 PetscFunctionReturn(PETSC_SUCCESS); 418 } 419 420 /*@ 421 PetscSFSetGraph - Set a parallel star forest 422 423 Collective 424 425 Input Parameters: 426 + sf - star forest 427 . nroots - number of root vertices on the current MPI process (these are possible targets for other process to attach leaves) 428 . nleaves - number of leaf vertices on the current MPI process, each of these references a root on any process 429 . ilocal - locations of leaves in leafdata buffers (locations must be >= 0, enforced during setup in debug mode), pass `NULL` for contiguous storage (same as passing (0, 1, 2, ..., nleaves-1)) 430 . localmode - copy mode for `ilocal` 431 . iremote - remote locations of root vertices for each leaf on the current process, length is `nleaves' (locations must be >= 0, enforced during setup in debug mode) 432 - remotemode - copy mode for `iremote` 433 434 Level: intermediate 435 436 Notes: 437 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 438 439 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 440 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 441 PETSc might modify the respective array; 442 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 443 If `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 444 445 `PetscSFSetGraphWithPattern()` is an alternative approach to provide certain communication patterns that have extra optimizations. 446 447 Fortran Notes: 448 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 449 450 Developer Notes: 451 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 452 This also allows to compare leaf sets of two `PetscSF`s easily. 453 454 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`, `PetscSFSetGraphWithPattern()` 455 @*/ 456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode) 457 { 458 PetscBool unique, contiguous; 459 460 PetscFunctionBegin; 461 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 462 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 463 if (nleaves > 0) PetscAssertPointer(iremote, 6); 464 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 465 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 466 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 467 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 468 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 469 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 470 471 if (sf->nroots >= 0) { /* Reset only if graph already set */ 472 PetscCall(PetscSFReset(sf)); 473 } 474 475 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 476 if (PetscDefined(USE_DEBUG)) { 477 PetscMPIInt size; 478 479 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 480 for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); 481 } 482 483 sf->nroots = nroots; 484 sf->nleaves = nleaves; 485 486 if (localmode == PETSC_COPY_VALUES && ilocal) { 487 PetscInt *tlocal = NULL; 488 489 PetscCall(PetscMalloc1(nleaves, &tlocal)); 490 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 491 ilocal = tlocal; 492 } 493 if (remotemode == PETSC_COPY_VALUES) { 494 PetscSFNode *tremote = NULL; 495 496 PetscCall(PetscMalloc1(nleaves, &tremote)); 497 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 498 iremote = tremote; 499 } 500 501 if (nleaves && ilocal) { 502 PetscSFNode work; 503 504 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 505 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 506 unique = PetscNot(unique); 507 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 508 sf->minleaf = ilocal[0]; 509 sf->maxleaf = ilocal[nleaves - 1]; 510 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 511 } else { 512 sf->minleaf = 0; 513 sf->maxleaf = nleaves - 1; 514 unique = PETSC_TRUE; 515 contiguous = PETSC_TRUE; 516 } 517 518 if (contiguous) { 519 if (localmode == PETSC_USE_POINTER) { 520 ilocal = NULL; 521 } else { 522 PetscCall(PetscFree(ilocal)); 523 } 524 } 525 sf->mine = ilocal; 526 if (localmode == PETSC_USE_POINTER) { 527 sf->mine_alloc = NULL; 528 } else { 529 sf->mine_alloc = ilocal; 530 } 531 if (PetscDefined(USE_DEBUG)) { 532 PetscMPIInt size; 533 534 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 535 for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); 536 } 537 sf->remote = iremote; 538 if (remotemode == PETSC_USE_POINTER) { 539 sf->remote_alloc = NULL; 540 } else { 541 sf->remote_alloc = iremote; 542 } 543 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 544 sf->graphset = PETSC_TRUE; 545 PetscFunctionReturn(PETSC_SUCCESS); 546 } 547 548 /*@ 549 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 550 551 Collective 552 553 Input Parameters: 554 + sf - The `PetscSF` 555 . map - Layout of roots over all processes (not used when pattern is `PETSCSF_PATTERN_ALLTOALL`) 556 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 557 558 Level: intermediate 559 560 Notes: 561 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `root` and its `PetscLayout` is `map`. 562 `n` and `N` are the local and global sizes of `root` respectively. 563 564 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to 565 sequential vectors `leaves` on all MPI processes. 566 567 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to a 568 sequential vector `leaves` on MPI rank 0. 569 570 With `PETSCSF_PATTERN_ALLTOALL`, map is not used. Suppose NP is the size of `sf`'s communicator. The routine 571 creates a graph where every MPI process has NP leaves and NP roots. On MPI rank i, its leaf j is connected to root i 572 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 573 not mean one can not send multiple items. One needs to create a new MPI datatype for the multiple data 574 items with `MPI_Type_contiguous` and use that as the <unit> argument in the `PetscSF` routines. In this case, roots and leaves are symmetric. 575 576 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 577 @*/ 578 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 579 { 580 MPI_Comm comm; 581 PetscInt n, N, res[2]; 582 PetscMPIInt rank, size; 583 PetscSFType type; 584 585 PetscFunctionBegin; 586 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 587 PetscValidLogicalCollectiveEnum(sf, pattern, 3); 588 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 589 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 590 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 591 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 592 PetscCallMPI(MPI_Comm_size(comm, &size)); 593 594 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 595 PetscInt sizei = size; 596 597 type = PETSCSFALLTOALL; 598 PetscCall(PetscLayoutCreate(comm, &sf->map)); 599 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 600 PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei))); 601 PetscCall(PetscLayoutSetUp(sf->map)); 602 } else { 603 PetscCall(PetscLayoutGetLocalSize(map, &n)); 604 PetscCall(PetscLayoutGetSize(map, &N)); 605 res[0] = n; 606 res[1] = -n; 607 /* Check if n are same over all ranks so that we can optimize it */ 608 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 609 if (res[0] == -res[1]) { /* same n */ 610 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 611 } else { 612 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 613 } 614 PetscCall(PetscLayoutReference(map, &sf->map)); 615 } 616 PetscCall(PetscSFSetType(sf, type)); 617 618 sf->pattern = pattern; 619 sf->mine = NULL; /* Contiguous */ 620 621 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 622 Also set other easy stuff. 623 */ 624 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 625 sf->nleaves = N; 626 sf->nroots = n; 627 sf->nranks = size; 628 sf->minleaf = 0; 629 sf->maxleaf = N - 1; 630 } else if (pattern == PETSCSF_PATTERN_GATHER) { 631 sf->nleaves = rank ? 0 : N; 632 sf->nroots = n; 633 sf->nranks = rank ? 0 : size; 634 sf->minleaf = 0; 635 sf->maxleaf = rank ? -1 : N - 1; 636 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 637 sf->nleaves = size; 638 sf->nroots = size; 639 sf->nranks = size; 640 sf->minleaf = 0; 641 sf->maxleaf = size - 1; 642 } 643 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 644 sf->graphset = PETSC_TRUE; 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 /*@ 649 PetscSFCreateInverseSF - given a `PetscSF` in which all roots have degree 1 (exactly one leaf), creates the inverse map 650 651 Collective 652 653 Input Parameter: 654 . sf - star forest to invert 655 656 Output Parameter: 657 . isf - inverse of `sf` 658 659 Level: advanced 660 661 Notes: 662 All roots must have degree 1. 663 664 The local space may be a permutation, but cannot be sparse. 665 666 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 667 @*/ 668 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 669 { 670 PetscMPIInt rank; 671 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 672 const PetscInt *ilocal; 673 PetscSFNode *roots, *leaves; 674 675 PetscFunctionBegin; 676 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 677 PetscSFCheckGraphSet(sf, 1); 678 PetscAssertPointer(isf, 2); 679 680 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 681 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 682 683 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 684 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 685 for (i = 0; i < maxlocal; i++) { 686 leaves[i].rank = rank; 687 leaves[i].index = i; 688 } 689 for (i = 0; i < nroots; i++) { 690 roots[i].rank = -1; 691 roots[i].index = -1; 692 } 693 PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 694 PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 695 696 /* Check whether our leaves are sparse */ 697 for (i = 0, count = 0; i < nroots; i++) 698 if (roots[i].rank >= 0) count++; 699 if (count == nroots) newilocal = NULL; 700 else { // Index for sparse leaves and compact "roots" array (which is to become our leaves). 701 PetscCall(PetscMalloc1(count, &newilocal)); 702 for (i = 0, count = 0; i < nroots; i++) { 703 if (roots[i].rank >= 0) { 704 newilocal[count] = i; 705 roots[count].rank = roots[i].rank; 706 roots[count].index = roots[i].index; 707 count++; 708 } 709 } 710 } 711 712 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 713 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 714 PetscCall(PetscFree2(roots, leaves)); 715 PetscFunctionReturn(PETSC_SUCCESS); 716 } 717 718 /*@ 719 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 720 721 Collective 722 723 Input Parameters: 724 + sf - communication object to duplicate 725 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 726 727 Output Parameter: 728 . newsf - new communication object 729 730 Level: beginner 731 732 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 733 @*/ 734 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 735 { 736 PetscSFType type; 737 MPI_Datatype dtype = MPIU_SCALAR; 738 739 PetscFunctionBegin; 740 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 741 PetscValidLogicalCollectiveEnum(sf, opt, 2); 742 PetscAssertPointer(newsf, 3); 743 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 744 PetscCall(PetscSFGetType(sf, &type)); 745 if (type) PetscCall(PetscSFSetType(*newsf, type)); 746 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 747 if (opt == PETSCSF_DUPLICATE_GRAPH) { 748 PetscSFCheckGraphSet(sf, 1); 749 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 750 PetscInt nroots, nleaves; 751 const PetscInt *ilocal; 752 const PetscSFNode *iremote; 753 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 754 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 755 } else { 756 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 757 } 758 } 759 /* Since oldtype is committed, so is newtype, according to MPI */ 760 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 761 (*newsf)->vscat.bs = sf->vscat.bs; 762 (*newsf)->vscat.unit = dtype; 763 (*newsf)->vscat.to_n = sf->vscat.to_n; 764 (*newsf)->vscat.from_n = sf->vscat.from_n; 765 /* Do not copy lsf. Build it on demand since it is rarely used */ 766 767 #if defined(PETSC_HAVE_DEVICE) 768 (*newsf)->backend = sf->backend; 769 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 770 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 771 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 772 #endif 773 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 774 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 775 PetscFunctionReturn(PETSC_SUCCESS); 776 } 777 778 /*@C 779 PetscSFGetGraph - Get the graph specifying a parallel star forest 780 781 Not Collective 782 783 Input Parameter: 784 . sf - star forest 785 786 Output Parameters: 787 + nroots - number of root vertices on the current process (these are possible targets for other MPI process to attach leaves) 788 . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process 789 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 790 - iremote - remote locations of root vertices for each leaf on the current process 791 792 Level: intermediate 793 794 Notes: 795 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 796 797 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 798 799 Fortran Note: 800 Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed 801 802 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 803 @*/ 804 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[]) 805 { 806 PetscFunctionBegin; 807 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 808 if (sf->ops->GetGraph) { 809 PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 810 } else { 811 if (nroots) *nroots = sf->nroots; 812 if (nleaves) *nleaves = sf->nleaves; 813 if (ilocal) *ilocal = sf->mine; 814 if (iremote) *iremote = sf->remote; 815 } 816 PetscFunctionReturn(PETSC_SUCCESS); 817 } 818 819 /*@ 820 PetscSFGetLeafRange - Get the active leaf ranges 821 822 Not Collective 823 824 Input Parameter: 825 . sf - star forest 826 827 Output Parameters: 828 + minleaf - minimum active leaf on this MPI process. Returns 0 if there are no leaves. 829 - maxleaf - maximum active leaf on this MPI process. Returns -1 if there are no leaves. 830 831 Level: developer 832 833 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 834 @*/ 835 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 836 { 837 PetscFunctionBegin; 838 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 839 PetscSFCheckGraphSet(sf, 1); 840 if (minleaf) *minleaf = sf->minleaf; 841 if (maxleaf) *maxleaf = sf->maxleaf; 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 845 /*@ 846 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 847 848 Collective 849 850 Input Parameters: 851 + A - the star forest 852 . obj - Optional object that provides the prefix for the option names 853 - name - command line option 854 855 Level: intermediate 856 857 Note: 858 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 859 860 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 861 @*/ 862 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 863 { 864 PetscFunctionBegin; 865 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 866 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 867 PetscFunctionReturn(PETSC_SUCCESS); 868 } 869 870 /*@ 871 PetscSFView - view a star forest 872 873 Collective 874 875 Input Parameters: 876 + sf - star forest 877 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 878 879 Level: beginner 880 881 .seealso: [](sec_petscsf), `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 882 @*/ 883 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 884 { 885 PetscBool isascii; 886 PetscViewerFormat format; 887 888 PetscFunctionBegin; 889 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 890 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 891 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 892 PetscCheckSameComm(sf, 1, viewer, 2); 893 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 894 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 895 if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 896 PetscMPIInt rank; 897 PetscInt j; 898 899 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 900 PetscCall(PetscViewerASCIIPushTab(viewer)); 901 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 902 if (!sf->graphset) { 903 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 904 PetscCall(PetscViewerASCIIPopTab(viewer)); 905 PetscFunctionReturn(PETSC_SUCCESS); 906 } 907 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 908 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 909 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 910 for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 911 PetscCall(PetscViewerFlush(viewer)); 912 PetscCall(PetscViewerGetFormat(viewer, &format)); 913 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 914 PetscMPIInt *tmpranks, *perm; 915 916 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 917 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 918 for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i; 919 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 920 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 921 for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) { 922 PetscMPIInt i = perm[ii]; 923 924 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 925 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 926 } 927 PetscCall(PetscFree2(tmpranks, perm)); 928 } 929 PetscCall(PetscViewerFlush(viewer)); 930 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 931 } 932 PetscCall(PetscViewerASCIIPopTab(viewer)); 933 } 934 PetscTryTypeMethod(sf, View, viewer); 935 PetscFunctionReturn(PETSC_SUCCESS); 936 } 937 938 /*@C 939 PetscSFGetRootRanks - Get the root MPI ranks and number of vertices referenced by leaves on this process 940 941 Not Collective 942 943 Input Parameter: 944 . sf - star forest 945 946 Output Parameters: 947 + nranks - number of MPI processes referenced by local part 948 . ranks - [`nranks`] array of MPI ranks 949 . roffset - [`nranks`+1] offset in `rmine` and `rremote` for each MPI process 950 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote MPI process, or `NULL` 951 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote MPI process, or `NULL` 952 953 Level: developer 954 955 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetLeafRanks()` 956 @*/ 957 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[]) 958 { 959 PetscFunctionBegin; 960 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 961 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 962 if (sf->ops->GetRootRanks) { 963 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 964 } else { 965 /* The generic implementation */ 966 if (nranks) *nranks = sf->nranks; 967 if (ranks) *ranks = sf->ranks; 968 if (roffset) *roffset = sf->roffset; 969 if (rmine) *rmine = sf->rmine; 970 if (rremote) *rremote = sf->rremote; 971 } 972 PetscFunctionReturn(PETSC_SUCCESS); 973 } 974 975 /*@C 976 PetscSFGetLeafRanks - Get leaf MPI ranks referencing roots on this process 977 978 Not Collective 979 980 Input Parameter: 981 . sf - star forest 982 983 Output Parameters: 984 + niranks - number of leaf MPI processes referencing roots on this process 985 . iranks - [`niranks`] array of MPI ranks 986 . ioffset - [`niranks`+1] offset in `irootloc` for each MPI process 987 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf MPI process 988 989 Level: developer 990 991 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()` 992 @*/ 993 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[]) 994 { 995 PetscFunctionBegin; 996 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 997 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 998 if (sf->ops->GetLeafRanks) { 999 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 1000 } else { 1001 PetscSFType type; 1002 PetscCall(PetscSFGetType(sf, &type)); 1003 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1004 } 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 1009 { 1010 PetscInt i; 1011 for (i = 0; i < n; i++) { 1012 if (needle == list[i]) return PETSC_TRUE; 1013 } 1014 return PETSC_FALSE; 1015 } 1016 1017 /*@C 1018 PetscSFSetUpRanks - Set up data structures associated with MPI ranks; this is for internal use by `PetscSF` implementations. 1019 1020 Collective 1021 1022 Input Parameters: 1023 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1024 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1025 1026 Level: developer 1027 1028 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()` 1029 @*/ 1030 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1031 { 1032 PetscHMapI table; 1033 PetscHashIter pos; 1034 PetscMPIInt size, groupsize, *groupranks, *ranks; 1035 PetscInt *rcount; 1036 PetscInt irank, sfnrank, ranksi; 1037 PetscMPIInt i, orank = -1; 1038 1039 PetscFunctionBegin; 1040 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1041 PetscSFCheckGraphSet(sf, 1); 1042 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1043 PetscCall(PetscHMapICreateWithSize(10, &table)); 1044 for (i = 0; i < sf->nleaves; i++) { 1045 /* Log 1-based rank */ 1046 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1047 } 1048 PetscCall(PetscHMapIGetSize(table, &sfnrank)); 1049 PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks)); 1050 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1051 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1052 PetscHashIterBegin(table, pos); 1053 for (i = 0; i < sf->nranks; i++) { 1054 PetscHashIterGetKey(table, pos, ranksi); 1055 PetscCall(PetscMPIIntCast(ranksi, &ranks[i])); 1056 PetscHashIterGetVal(table, pos, rcount[i]); 1057 PetscHashIterNext(table, pos); 1058 ranks[i]--; /* Convert back to 0-based */ 1059 } 1060 PetscCall(PetscHMapIDestroy(&table)); 1061 1062 /* We expect that dgroup is reliably "small" while nranks could be large */ 1063 { 1064 MPI_Group group = MPI_GROUP_NULL; 1065 PetscMPIInt *dgroupranks; 1066 1067 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1068 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1069 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1070 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1071 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1072 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1073 PetscCallMPI(MPI_Group_free(&group)); 1074 PetscCall(PetscFree(dgroupranks)); 1075 } 1076 1077 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1078 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1079 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1080 if (InList(ranks[i], groupsize, groupranks)) break; 1081 } 1082 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1083 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1084 } 1085 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1086 PetscMPIInt tmprank; 1087 PetscInt tmpcount; 1088 1089 tmprank = ranks[i]; 1090 tmpcount = rcount[i]; 1091 ranks[i] = ranks[sf->ndranks]; 1092 rcount[i] = rcount[sf->ndranks]; 1093 ranks[sf->ndranks] = tmprank; 1094 rcount[sf->ndranks] = tmpcount; 1095 sf->ndranks++; 1096 } 1097 } 1098 PetscCall(PetscFree(groupranks)); 1099 PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount)); 1100 if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1101 sf->roffset[0] = 0; 1102 for (i = 0; i < sf->nranks; i++) { 1103 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1104 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1105 rcount[i] = 0; 1106 } 1107 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1108 /* short circuit */ 1109 if (orank != sf->remote[i].rank) { 1110 /* Search for index of iremote[i].rank in sf->ranks */ 1111 PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank)); 1112 PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank)); 1113 if (irank < 0) { 1114 PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1115 if (irank >= 0) irank += sf->ndranks; 1116 } 1117 } 1118 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank); 1119 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1120 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1121 rcount[irank]++; 1122 } 1123 PetscCall(PetscFree2(rcount, ranks)); 1124 PetscFunctionReturn(PETSC_SUCCESS); 1125 } 1126 1127 /*@C 1128 PetscSFGetGroups - gets incoming and outgoing process groups 1129 1130 Collective 1131 1132 Input Parameter: 1133 . sf - star forest 1134 1135 Output Parameters: 1136 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1137 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1138 1139 Level: developer 1140 1141 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1142 @*/ 1143 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1144 { 1145 MPI_Group group = MPI_GROUP_NULL; 1146 1147 PetscFunctionBegin; 1148 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1149 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1150 if (sf->ingroup == MPI_GROUP_NULL) { 1151 PetscInt i; 1152 const PetscInt *indegree; 1153 PetscMPIInt rank, *outranks, *inranks, indegree0; 1154 PetscSFNode *remote; 1155 PetscSF bgcount; 1156 1157 /* Compute the number of incoming ranks */ 1158 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1159 for (i = 0; i < sf->nranks; i++) { 1160 remote[i].rank = sf->ranks[i]; 1161 remote[i].index = 0; 1162 } 1163 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1164 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1165 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1166 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1167 /* Enumerate the incoming ranks */ 1168 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1169 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1170 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1171 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1172 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1173 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1174 PetscCall(PetscMPIIntCast(indegree[0], &indegree0)); 1175 PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup)); 1176 PetscCallMPI(MPI_Group_free(&group)); 1177 PetscCall(PetscFree2(inranks, outranks)); 1178 PetscCall(PetscSFDestroy(&bgcount)); 1179 } 1180 *incoming = sf->ingroup; 1181 1182 if (sf->outgroup == MPI_GROUP_NULL) { 1183 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1184 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1185 PetscCallMPI(MPI_Group_free(&group)); 1186 } 1187 *outgoing = sf->outgroup; 1188 PetscFunctionReturn(PETSC_SUCCESS); 1189 } 1190 1191 /*@ 1192 PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 1193 1194 Collective 1195 1196 Input Parameter: 1197 . sf - star forest 1198 1199 Output Parameter: 1200 . rsf - the star forest with a single root per MPI process to perform communications 1201 1202 Level: developer 1203 1204 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 1205 @*/ 1206 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 1207 { 1208 PetscFunctionBegin; 1209 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1210 PetscAssertPointer(rsf, 2); 1211 if (!sf->rankssf) { 1212 PetscSFNode *rremotes; 1213 const PetscMPIInt *ranks; 1214 PetscMPIInt nranks; 1215 1216 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 1217 PetscCall(PetscMalloc1(nranks, &rremotes)); 1218 for (PetscInt i = 0; i < nranks; i++) { 1219 rremotes[i].rank = ranks[i]; 1220 rremotes[i].index = 0; 1221 } 1222 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 1223 PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 1224 } 1225 *rsf = sf->rankssf; 1226 PetscFunctionReturn(PETSC_SUCCESS); 1227 } 1228 1229 /*@ 1230 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1231 1232 Collective 1233 1234 Input Parameter: 1235 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1236 1237 Output Parameter: 1238 . multi - star forest with split roots, such that each root has degree exactly 1 (has one leaf) 1239 1240 Level: developer 1241 1242 Note: 1243 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1244 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1245 edge, it is a candidate for future optimization that might involve its removal. 1246 1247 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1248 @*/ 1249 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1250 { 1251 PetscFunctionBegin; 1252 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1253 PetscAssertPointer(multi, 2); 1254 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1255 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1256 *multi = sf->multi; 1257 sf->multi->multi = sf->multi; 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 if (!sf->multi) { 1261 const PetscInt *indegree; 1262 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1263 PetscSFNode *remote; 1264 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1265 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1266 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1267 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1268 inoffset[0] = 0; 1269 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1270 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1271 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1272 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1273 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1274 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1275 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1276 } 1277 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1278 for (i = 0; i < sf->nleaves; i++) { 1279 remote[i].rank = sf->remote[i].rank; 1280 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1281 } 1282 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1283 sf->multi->multi = sf->multi; 1284 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1285 if (sf->rankorder) { /* Sort the ranks */ 1286 PetscMPIInt rank; 1287 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1288 PetscSFNode *newremote; 1289 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1290 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1291 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1292 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1293 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1294 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1295 /* Sort the incoming ranks at each vertex, build the inverse map */ 1296 for (i = 0; i < sf->nroots; i++) { 1297 PetscInt j; 1298 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1299 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1300 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1301 } 1302 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1303 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1304 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1305 for (i = 0; i < sf->nleaves; i++) { 1306 newremote[i].rank = sf->remote[i].rank; 1307 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1308 } 1309 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1310 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1311 } 1312 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1313 } 1314 *multi = sf->multi; 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } 1317 1318 /*@C 1319 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1320 1321 Collective 1322 1323 Input Parameters: 1324 + sf - original star forest 1325 . nselected - number of selected roots on this MPI process 1326 - selected - indices of the selected roots on this MPI process 1327 1328 Output Parameter: 1329 . esf - new star forest 1330 1331 Level: advanced 1332 1333 Note: 1334 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1335 be done by calling PetscSFGetGraph(). 1336 1337 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1338 @*/ 1339 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *esf) 1340 { 1341 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1342 const PetscInt *ilocal; 1343 signed char *rootdata, *leafdata, *leafmem; 1344 const PetscSFNode *iremote; 1345 PetscSFNode *new_iremote; 1346 MPI_Comm comm; 1347 1348 PetscFunctionBegin; 1349 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1350 PetscSFCheckGraphSet(sf, 1); 1351 if (nselected) PetscAssertPointer(selected, 3); 1352 PetscAssertPointer(esf, 4); 1353 1354 PetscCall(PetscSFSetUp(sf)); 1355 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1356 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1357 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1358 1359 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1360 PetscBool dups; 1361 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1362 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1363 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1364 } 1365 1366 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1367 else { 1368 /* A generic version of creating embedded sf */ 1369 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1370 maxlocal = maxleaf - minleaf + 1; 1371 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1372 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1373 /* Tag selected roots and bcast to leaves */ 1374 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1375 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1376 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1377 1378 /* Build esf with leaves that are still connected */ 1379 esf_nleaves = 0; 1380 for (i = 0; i < nleaves; i++) { 1381 j = ilocal ? ilocal[i] : i; 1382 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1383 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1384 */ 1385 esf_nleaves += (leafdata[j] ? 1 : 0); 1386 } 1387 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1388 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1389 for (i = n = 0; i < nleaves; i++) { 1390 j = ilocal ? ilocal[i] : i; 1391 if (leafdata[j]) { 1392 new_ilocal[n] = j; 1393 new_iremote[n].rank = iremote[i].rank; 1394 new_iremote[n].index = iremote[i].index; 1395 ++n; 1396 } 1397 } 1398 PetscCall(PetscSFCreate(comm, esf)); 1399 PetscCall(PetscSFSetFromOptions(*esf)); 1400 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1401 PetscCall(PetscFree2(rootdata, leafmem)); 1402 } 1403 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 /*@C 1408 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1409 1410 Collective 1411 1412 Input Parameters: 1413 + sf - original star forest 1414 . nselected - number of selected leaves on this MPI process 1415 - selected - indices of the selected leaves on this MPI process 1416 1417 Output Parameter: 1418 . newsf - new star forest 1419 1420 Level: advanced 1421 1422 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1423 @*/ 1424 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *newsf) 1425 { 1426 const PetscSFNode *iremote; 1427 PetscSFNode *new_iremote; 1428 const PetscInt *ilocal; 1429 PetscInt i, nroots, *leaves, *new_ilocal; 1430 MPI_Comm comm; 1431 1432 PetscFunctionBegin; 1433 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1434 PetscSFCheckGraphSet(sf, 1); 1435 if (nselected) PetscAssertPointer(selected, 3); 1436 PetscAssertPointer(newsf, 4); 1437 1438 /* Uniq selected[] and put results in leaves[] */ 1439 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1440 PetscCall(PetscMalloc1(nselected, &leaves)); 1441 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1442 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1443 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1444 1445 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1446 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1447 else { 1448 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1449 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1450 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1451 for (i = 0; i < nselected; ++i) { 1452 const PetscInt l = leaves[i]; 1453 new_ilocal[i] = ilocal ? ilocal[l] : l; 1454 new_iremote[i].rank = iremote[l].rank; 1455 new_iremote[i].index = iremote[l].index; 1456 } 1457 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1458 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1459 } 1460 PetscCall(PetscFree(leaves)); 1461 PetscFunctionReturn(PETSC_SUCCESS); 1462 } 1463 1464 /*@C 1465 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1466 1467 Collective 1468 1469 Input Parameters: 1470 + sf - star forest on which to communicate 1471 . unit - data type associated with each node 1472 . rootdata - buffer to broadcast 1473 - op - operation to use for reduction 1474 1475 Output Parameter: 1476 . leafdata - buffer to be reduced with values from each leaf's respective root 1477 1478 Level: intermediate 1479 1480 Note: 1481 When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers 1482 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should 1483 use `PetscSFBcastWithMemTypeBegin()` instead. 1484 1485 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1486 @*/ 1487 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1488 { 1489 PetscMemType rootmtype, leafmtype; 1490 1491 PetscFunctionBegin; 1492 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1493 PetscCall(PetscSFSetUp(sf)); 1494 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1495 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1496 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1497 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1498 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1499 PetscFunctionReturn(PETSC_SUCCESS); 1500 } 1501 1502 /*@C 1503 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1504 to `PetscSFBcastEnd()` 1505 1506 Collective 1507 1508 Input Parameters: 1509 + sf - star forest on which to communicate 1510 . unit - data type associated with each node 1511 . rootmtype - memory type of `rootdata` 1512 . rootdata - buffer to broadcast 1513 . leafmtype - memory type of `leafdata` 1514 - op - operation to use for reduction 1515 1516 Output Parameter: 1517 . leafdata - buffer to be reduced with values from each leaf's respective root 1518 1519 Level: intermediate 1520 1521 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1522 @*/ 1523 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1524 { 1525 PetscFunctionBegin; 1526 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1527 PetscCall(PetscSFSetUp(sf)); 1528 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1529 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1530 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1531 PetscFunctionReturn(PETSC_SUCCESS); 1532 } 1533 1534 /*@C 1535 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1536 1537 Collective 1538 1539 Input Parameters: 1540 + sf - star forest 1541 . unit - data type 1542 . rootdata - buffer to broadcast 1543 - op - operation to use for reduction 1544 1545 Output Parameter: 1546 . leafdata - buffer to be reduced with values from each leaf's respective root 1547 1548 Level: intermediate 1549 1550 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1551 @*/ 1552 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1553 { 1554 PetscFunctionBegin; 1555 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1556 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1557 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1558 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1559 PetscFunctionReturn(PETSC_SUCCESS); 1560 } 1561 1562 /*@C 1563 PetscSFReduceBegin - begin reduction (communication) of `leafdata` into `rootdata`, to be completed with call to `PetscSFReduceEnd()` 1564 1565 Collective 1566 1567 Input Parameters: 1568 + sf - star forest 1569 . unit - data type 1570 . leafdata - values to reduce (communicate) 1571 - op - reduction operation 1572 1573 Output Parameter: 1574 . rootdata - result of reduction of values (`leafdata`) from all leaves to each root 1575 1576 Level: intermediate 1577 1578 Note: 1579 When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers 1580 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should 1581 use `PetscSFReduceWithMemTypeBegin()` instead. 1582 1583 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`, `MPI_Datatype` 1584 @*/ 1585 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1586 { 1587 PetscMemType rootmtype, leafmtype; 1588 1589 PetscFunctionBegin; 1590 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1591 PetscCall(PetscSFSetUp(sf)); 1592 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1593 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1594 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1595 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1596 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1597 PetscFunctionReturn(PETSC_SUCCESS); 1598 } 1599 1600 /*@C 1601 PetscSFReduceWithMemTypeBegin - begin reduction of `leafdata` into `rootdata` with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1602 1603 Collective 1604 1605 Input Parameters: 1606 + sf - star forest 1607 . unit - data type 1608 . leafmtype - memory type of `leafdata` 1609 . leafdata - values to reduce 1610 . rootmtype - memory type of `rootdata` 1611 - op - reduction operation 1612 1613 Output Parameter: 1614 . rootdata - result of reduction of values from all leaves of each root 1615 1616 Level: intermediate 1617 1618 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1619 @*/ 1620 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1621 { 1622 PetscFunctionBegin; 1623 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1624 PetscCall(PetscSFSetUp(sf)); 1625 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1626 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1627 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1628 PetscFunctionReturn(PETSC_SUCCESS); 1629 } 1630 1631 /*@C 1632 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1633 1634 Collective 1635 1636 Input Parameters: 1637 + sf - star forest 1638 . unit - data type 1639 . leafdata - values to reduce 1640 - op - reduction operation 1641 1642 Output Parameter: 1643 . rootdata - result of reduction of values from all leaves of each root 1644 1645 Level: intermediate 1646 1647 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1648 @*/ 1649 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1650 { 1651 PetscFunctionBegin; 1652 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1653 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1654 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1655 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1656 PetscFunctionReturn(PETSC_SUCCESS); 1657 } 1658 1659 /*@C 1660 PetscSFFetchAndOpBegin - begin operation that fetches values from `rootdata` and updates it atomically by applying operation using `leafdata`, 1661 to be completed with `PetscSFFetchAndOpEnd()` 1662 1663 Collective 1664 1665 Input Parameters: 1666 + sf - star forest 1667 . unit - data type 1668 . leafdata - leaf values to use in reduction 1669 - op - operation to use for reduction 1670 1671 Output Parameters: 1672 + rootdata - root values to be updated, input state is seen by first process to perform an update 1673 - leafupdate - state at each leaf's respective root immediately prior to atomic update 1674 1675 Level: advanced 1676 1677 Note: 1678 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1679 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1680 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1681 integers. 1682 1683 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1684 @*/ 1685 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1686 { 1687 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1688 1689 PetscFunctionBegin; 1690 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1691 PetscCall(PetscSFSetUp(sf)); 1692 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1693 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1694 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1695 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1696 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1697 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1698 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 /*@C 1703 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1704 applying operation using `leafdata`, to be completed with `PetscSFFetchAndOpEnd()` 1705 1706 Collective 1707 1708 Input Parameters: 1709 + sf - star forest 1710 . unit - data type 1711 . rootmtype - memory type of `rootdata` 1712 . leafmtype - memory type of `leafdata` 1713 . leafdata - leaf values to use in reduction 1714 . leafupdatemtype - memory type of `leafupdate` 1715 - op - operation to use for reduction 1716 1717 Output Parameters: 1718 + rootdata - root values to be updated, input state is seen by first process to perform an update 1719 - leafupdate - state at each leaf's respective root immediately prior to atomic update 1720 1721 Level: advanced 1722 1723 Note: 1724 See `PetscSFFetchAndOpBegin()` for more details. 1725 1726 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1727 @*/ 1728 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1729 { 1730 PetscFunctionBegin; 1731 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1732 PetscCall(PetscSFSetUp(sf)); 1733 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1734 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1735 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1736 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1737 PetscFunctionReturn(PETSC_SUCCESS); 1738 } 1739 1740 /*@C 1741 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1742 to fetch values from roots and update atomically by applying operation using `leafdata` 1743 1744 Collective 1745 1746 Input Parameters: 1747 + sf - star forest 1748 . unit - data type 1749 . leafdata - leaf values to use in reduction 1750 - op - operation to use for reduction 1751 1752 Output Parameters: 1753 + rootdata - root values to be updated, input state is seen by first process to perform an update 1754 - leafupdate - state at each leaf's respective root immediately prior to atomic update 1755 1756 Level: advanced 1757 1758 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1759 @*/ 1760 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1761 { 1762 PetscFunctionBegin; 1763 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1764 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1765 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1766 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1767 PetscFunctionReturn(PETSC_SUCCESS); 1768 } 1769 1770 /*@C 1771 PetscSFComputeDegreeBegin - begin computation of the degree of each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1772 1773 Collective 1774 1775 Input Parameter: 1776 . sf - star forest 1777 1778 Output Parameter: 1779 . degree - degree (the number of leaves) of each root vertex 1780 1781 Level: advanced 1782 1783 Note: 1784 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1785 1786 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1787 @*/ 1788 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[]) 1789 { 1790 PetscFunctionBegin; 1791 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1792 PetscSFCheckGraphSet(sf, 1); 1793 PetscAssertPointer(degree, 2); 1794 if (!sf->degreeknown) { 1795 PetscInt i, nroots = sf->nroots, maxlocal; 1796 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1797 maxlocal = sf->maxleaf - sf->minleaf + 1; 1798 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1799 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1800 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1801 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1802 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1803 } 1804 *degree = NULL; 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 /*@C 1809 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1810 1811 Collective 1812 1813 Input Parameter: 1814 . sf - star forest 1815 1816 Output Parameter: 1817 . degree - degree of each root vertex 1818 1819 Level: developer 1820 1821 Note: 1822 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1823 1824 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1825 @*/ 1826 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[]) 1827 { 1828 PetscFunctionBegin; 1829 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1830 PetscSFCheckGraphSet(sf, 1); 1831 PetscAssertPointer(degree, 2); 1832 if (!sf->degreeknown) { 1833 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1834 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1835 PetscCall(PetscFree(sf->degreetmp)); 1836 sf->degreeknown = PETSC_TRUE; 1837 } 1838 *degree = sf->degree; 1839 PetscFunctionReturn(PETSC_SUCCESS); 1840 } 1841 1842 /*@C 1843 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1844 Each multi-root is assigned index of the corresponding original root. 1845 1846 Collective 1847 1848 Input Parameters: 1849 + sf - star forest 1850 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()` and `PetscSFComputeDegreeEnd()` 1851 1852 Output Parameters: 1853 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1854 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1855 1856 Level: developer 1857 1858 Note: 1859 The returned array `multiRootsOrigNumbering` should be destroyed with `PetscFree()` when no longer needed. 1860 1861 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1862 @*/ 1863 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1864 { 1865 PetscSF msf; 1866 PetscInt k = 0, nroots, nmroots; 1867 1868 PetscFunctionBegin; 1869 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1870 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1871 if (nroots) PetscAssertPointer(degree, 2); 1872 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1873 PetscAssertPointer(multiRootsOrigNumbering, 4); 1874 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1875 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1876 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1877 for (PetscInt i = 0; i < nroots; i++) { 1878 if (!degree[i]) continue; 1879 for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1880 } 1881 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1882 if (nMultiRoots) *nMultiRoots = nmroots; 1883 PetscFunctionReturn(PETSC_SUCCESS); 1884 } 1885 1886 /*@C 1887 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1888 1889 Collective 1890 1891 Input Parameters: 1892 + sf - star forest 1893 . unit - data type 1894 - leafdata - leaf data to gather to roots 1895 1896 Output Parameter: 1897 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has) 1898 1899 Level: intermediate 1900 1901 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1902 @*/ 1903 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1904 { 1905 PetscSF multi = NULL; 1906 1907 PetscFunctionBegin; 1908 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1909 PetscCall(PetscSFSetUp(sf)); 1910 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1911 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1912 PetscFunctionReturn(PETSC_SUCCESS); 1913 } 1914 1915 /*@C 1916 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1917 1918 Collective 1919 1920 Input Parameters: 1921 + sf - star forest 1922 . unit - data type 1923 - leafdata - leaf data to gather to roots 1924 1925 Output Parameter: 1926 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has) 1927 1928 Level: intermediate 1929 1930 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1931 @*/ 1932 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1933 { 1934 PetscSF multi = NULL; 1935 1936 PetscFunctionBegin; 1937 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1938 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1939 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1940 PetscFunctionReturn(PETSC_SUCCESS); 1941 } 1942 1943 /*@C 1944 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1945 1946 Collective 1947 1948 Input Parameters: 1949 + sf - star forest 1950 . unit - data type 1951 - multirootdata - root buffer to send to each leaf, one unit of data is provided to each leaf thus the amount of space per root is equal to its degree (which is the number of leaves that root has) 1952 1953 Output Parameter: 1954 . leafdata - leaf data to be update with personal data from each respective root 1955 1956 Level: intermediate 1957 1958 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1959 @*/ 1960 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1961 { 1962 PetscSF multi = NULL; 1963 1964 PetscFunctionBegin; 1965 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1966 PetscCall(PetscSFSetUp(sf)); 1967 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1968 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1969 PetscFunctionReturn(PETSC_SUCCESS); 1970 } 1971 1972 /*@C 1973 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1974 1975 Collective 1976 1977 Input Parameters: 1978 + sf - star forest 1979 . unit - data type 1980 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1981 1982 Output Parameter: 1983 . leafdata - leaf data to be update with personal data from each respective root 1984 1985 Level: intermediate 1986 1987 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1988 @*/ 1989 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1990 { 1991 PetscSF multi = NULL; 1992 1993 PetscFunctionBegin; 1994 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1995 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1996 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1997 PetscFunctionReturn(PETSC_SUCCESS); 1998 } 1999 2000 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 2001 { 2002 PetscInt i, n, nleaves; 2003 const PetscInt *ilocal = NULL; 2004 PetscHSetI seen; 2005 2006 PetscFunctionBegin; 2007 if (PetscDefined(USE_DEBUG)) { 2008 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 2009 PetscCall(PetscHSetICreate(&seen)); 2010 for (i = 0; i < nleaves; i++) { 2011 const PetscInt leaf = ilocal ? ilocal[i] : i; 2012 PetscCall(PetscHSetIAdd(seen, leaf)); 2013 } 2014 PetscCall(PetscHSetIGetSize(seen, &n)); 2015 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 2016 PetscCall(PetscHSetIDestroy(&seen)); 2017 } 2018 PetscFunctionReturn(PETSC_SUCCESS); 2019 } 2020 2021 /*@ 2022 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2023 2024 Input Parameters: 2025 + sfA - The first `PetscSF` 2026 - sfB - The second `PetscSF` 2027 2028 Output Parameter: 2029 . sfBA - The composite `PetscSF` 2030 2031 Level: developer 2032 2033 Notes: 2034 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2035 forests, i.e. the same leaf is not connected with different roots. 2036 2037 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 2038 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 2039 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 2040 `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfB`, on connected nodes. 2041 2042 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2043 @*/ 2044 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2045 { 2046 const PetscSFNode *remotePointsA, *remotePointsB; 2047 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 2048 const PetscInt *localPointsA, *localPointsB; 2049 PetscInt *localPointsBA; 2050 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 2051 PetscBool denseB; 2052 2053 PetscFunctionBegin; 2054 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2055 PetscSFCheckGraphSet(sfA, 1); 2056 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2057 PetscSFCheckGraphSet(sfB, 2); 2058 PetscCheckSameComm(sfA, 1, sfB, 2); 2059 PetscAssertPointer(sfBA, 3); 2060 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2061 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2062 2063 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2064 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2065 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2066 numRootsB; otherwise, garbage will be broadcasted. 2067 Example (comm size = 1): 2068 sfA: 0 <- (0, 0) 2069 sfB: 100 <- (0, 0) 2070 101 <- (0, 1) 2071 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2072 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2073 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2074 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2075 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2076 for (i = 0; i < numRootsB; i++) { 2077 reorderedRemotePointsA[i].rank = -1; 2078 reorderedRemotePointsA[i].index = -1; 2079 } 2080 for (i = 0; i < numLeavesA; i++) { 2081 PetscInt localp = localPointsA ? localPointsA[i] : i; 2082 2083 if (localp >= numRootsB) continue; 2084 reorderedRemotePointsA[localp] = remotePointsA[i]; 2085 } 2086 remotePointsA = reorderedRemotePointsA; 2087 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2088 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2089 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2090 leafdataB[i].rank = -1; 2091 leafdataB[i].index = -1; 2092 } 2093 PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2094 PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2095 PetscCall(PetscFree(reorderedRemotePointsA)); 2096 2097 denseB = (PetscBool)!localPointsB; 2098 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2099 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2100 else numLeavesBA++; 2101 } 2102 if (denseB) { 2103 localPointsBA = NULL; 2104 remotePointsBA = leafdataB; 2105 } else { 2106 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2107 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2108 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2109 const PetscInt l = localPointsB ? localPointsB[i] : i; 2110 2111 if (leafdataB[l - minleaf].rank == -1) continue; 2112 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2113 localPointsBA[numLeavesBA] = l; 2114 numLeavesBA++; 2115 } 2116 PetscCall(PetscFree(leafdataB)); 2117 } 2118 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2119 PetscCall(PetscSFSetFromOptions(*sfBA)); 2120 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 /*@ 2125 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2126 2127 Input Parameters: 2128 + sfA - The first `PetscSF` 2129 - sfB - The second `PetscSF` 2130 2131 Output Parameter: 2132 . sfBA - The composite `PetscSF`. 2133 2134 Level: developer 2135 2136 Notes: 2137 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2138 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2139 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2140 2141 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2142 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2143 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` 2144 on `sfA`, then 2145 a `PetscSFReduceBegin()` and `PetscSFReduceEnd()` on `sfB`, on connected roots. 2146 2147 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2148 @*/ 2149 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2150 { 2151 const PetscSFNode *remotePointsA, *remotePointsB; 2152 PetscSFNode *remotePointsBA; 2153 const PetscInt *localPointsA, *localPointsB; 2154 PetscSFNode *reorderedRemotePointsA = NULL; 2155 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2156 MPI_Op op; 2157 #if defined(PETSC_USE_64BIT_INDICES) 2158 PetscBool iswin; 2159 #endif 2160 2161 PetscFunctionBegin; 2162 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2163 PetscSFCheckGraphSet(sfA, 1); 2164 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2165 PetscSFCheckGraphSet(sfB, 2); 2166 PetscCheckSameComm(sfA, 1, sfB, 2); 2167 PetscAssertPointer(sfBA, 3); 2168 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2169 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2170 2171 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2172 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2173 2174 /* TODO: Check roots of sfB have degree of 1 */ 2175 /* Once we implement it, we can replace the MPI_MAXLOC 2176 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2177 We use MPI_MAXLOC only to have a deterministic output from this routine if 2178 the root condition is not meet. 2179 */ 2180 op = MPI_MAXLOC; 2181 #if defined(PETSC_USE_64BIT_INDICES) 2182 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2183 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2184 if (iswin) op = MPI_REPLACE; 2185 #endif 2186 2187 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2188 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2189 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2190 reorderedRemotePointsA[i].rank = -1; 2191 reorderedRemotePointsA[i].index = -1; 2192 } 2193 if (localPointsA) { 2194 for (i = 0; i < numLeavesA; i++) { 2195 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2196 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2197 } 2198 } else { 2199 for (i = 0; i < numLeavesA; i++) { 2200 if (i > maxleaf || i < minleaf) continue; 2201 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2202 } 2203 } 2204 2205 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2206 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2207 for (i = 0; i < numRootsB; i++) { 2208 remotePointsBA[i].rank = -1; 2209 remotePointsBA[i].index = -1; 2210 } 2211 2212 PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2213 PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2214 PetscCall(PetscFree(reorderedRemotePointsA)); 2215 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2216 if (remotePointsBA[i].rank == -1) continue; 2217 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2218 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2219 localPointsBA[numLeavesBA] = i; 2220 numLeavesBA++; 2221 } 2222 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2223 PetscCall(PetscSFSetFromOptions(*sfBA)); 2224 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2225 PetscFunctionReturn(PETSC_SUCCESS); 2226 } 2227 2228 /* 2229 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2230 2231 Input Parameter: 2232 . sf - The global `PetscSF` 2233 2234 Output Parameter: 2235 . out - The local `PetscSF` 2236 2237 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()` 2238 */ 2239 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2240 { 2241 MPI_Comm comm; 2242 PetscMPIInt myrank; 2243 const PetscInt *ilocal; 2244 const PetscSFNode *iremote; 2245 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2246 PetscSFNode *liremote; 2247 PetscSF lsf; 2248 2249 PetscFunctionBegin; 2250 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2251 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2252 else { 2253 PetscMPIInt irank; 2254 2255 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2256 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2257 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2258 2259 /* Find out local edges and build a local SF */ 2260 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2261 for (i = lnleaves = 0; i < nleaves; i++) { 2262 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2263 if (irank == myrank) lnleaves++; 2264 } 2265 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2266 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2267 2268 for (i = j = 0; i < nleaves; i++) { 2269 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2270 if (irank == myrank) { 2271 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2272 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2273 liremote[j].index = iremote[i].index; 2274 j++; 2275 } 2276 } 2277 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2278 PetscCall(PetscSFSetFromOptions(lsf)); 2279 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2280 PetscCall(PetscSFSetUp(lsf)); 2281 *out = lsf; 2282 } 2283 PetscFunctionReturn(PETSC_SUCCESS); 2284 } 2285 2286 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2287 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2288 { 2289 PetscMemType rootmtype, leafmtype; 2290 2291 PetscFunctionBegin; 2292 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2293 PetscCall(PetscSFSetUp(sf)); 2294 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2295 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2296 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2297 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2298 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2299 PetscFunctionReturn(PETSC_SUCCESS); 2300 } 2301 2302 /*@ 2303 PetscSFConcatenate - concatenate multiple `PetscSF` into a new `PetscSF` 2304 2305 Input Parameters: 2306 + comm - the communicator 2307 . nsfs - the number of input `PetscSF` 2308 . sfs - the array of input `PetscSF` 2309 . rootMode - the root mode specifying how roots are handled 2310 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2311 2312 Output Parameter: 2313 . newsf - The resulting `PetscSF` 2314 2315 Level: advanced 2316 2317 Notes: 2318 The communicator of all `PetscSF`s in `sfs` must be comm. 2319 2320 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2321 2322 The offsets in `leafOffsets` are added to the original leaf indices. 2323 2324 If all input `PetscSF`s use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2325 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2326 2327 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2328 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2329 2330 All root modes retain the essential connectivity condition. 2331 2332 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2333 2334 Parameter `rootMode` controls how the input root spaces are combined. 2335 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2336 and is also the same in the output `PetscSF`. 2337 2338 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2339 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2340 roots of sfs[0], sfs[1], sfs[2], ... are joined on each MPI process separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2341 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2342 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2343 the original root MPI processes are ignored. 2344 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2345 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each MPI process 2346 to keep the load balancing. 2347 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different MPI processes. 2348 2349 Example: 2350 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2351 .vb 2352 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2353 for m in {local,global,shared}; do 2354 mpiexec -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2355 done 2356 .ve 2357 we generate two identical `PetscSF`s sf_0 and sf_1, 2358 .vb 2359 PetscSF Object: sf_0 2 MPI processes 2360 type: basic 2361 rank #leaves #roots 2362 [ 0] 4 2 2363 [ 1] 4 2 2364 leaves roots roots in global numbering 2365 ( 0, 0) <- ( 0, 0) = 0 2366 ( 0, 1) <- ( 0, 1) = 1 2367 ( 0, 2) <- ( 1, 0) = 2 2368 ( 0, 3) <- ( 1, 1) = 3 2369 ( 1, 0) <- ( 0, 0) = 0 2370 ( 1, 1) <- ( 0, 1) = 1 2371 ( 1, 2) <- ( 1, 0) = 2 2372 ( 1, 3) <- ( 1, 1) = 3 2373 .ve 2374 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2375 .vb 2376 rootMode = local: 2377 PetscSF Object: result_sf 2 MPI processes 2378 type: basic 2379 rank #leaves #roots 2380 [ 0] 8 4 2381 [ 1] 8 4 2382 leaves roots roots in global numbering 2383 ( 0, 0) <- ( 0, 0) = 0 2384 ( 0, 1) <- ( 0, 1) = 1 2385 ( 0, 2) <- ( 1, 0) = 4 2386 ( 0, 3) <- ( 1, 1) = 5 2387 ( 0, 4) <- ( 0, 2) = 2 2388 ( 0, 5) <- ( 0, 3) = 3 2389 ( 0, 6) <- ( 1, 2) = 6 2390 ( 0, 7) <- ( 1, 3) = 7 2391 ( 1, 0) <- ( 0, 0) = 0 2392 ( 1, 1) <- ( 0, 1) = 1 2393 ( 1, 2) <- ( 1, 0) = 4 2394 ( 1, 3) <- ( 1, 1) = 5 2395 ( 1, 4) <- ( 0, 2) = 2 2396 ( 1, 5) <- ( 0, 3) = 3 2397 ( 1, 6) <- ( 1, 2) = 6 2398 ( 1, 7) <- ( 1, 3) = 7 2399 2400 rootMode = global: 2401 PetscSF Object: result_sf 2 MPI processes 2402 type: basic 2403 rank #leaves #roots 2404 [ 0] 8 4 2405 [ 1] 8 4 2406 leaves roots roots in global numbering 2407 ( 0, 0) <- ( 0, 0) = 0 2408 ( 0, 1) <- ( 0, 1) = 1 2409 ( 0, 2) <- ( 0, 2) = 2 2410 ( 0, 3) <- ( 0, 3) = 3 2411 ( 0, 4) <- ( 1, 0) = 4 2412 ( 0, 5) <- ( 1, 1) = 5 2413 ( 0, 6) <- ( 1, 2) = 6 2414 ( 0, 7) <- ( 1, 3) = 7 2415 ( 1, 0) <- ( 0, 0) = 0 2416 ( 1, 1) <- ( 0, 1) = 1 2417 ( 1, 2) <- ( 0, 2) = 2 2418 ( 1, 3) <- ( 0, 3) = 3 2419 ( 1, 4) <- ( 1, 0) = 4 2420 ( 1, 5) <- ( 1, 1) = 5 2421 ( 1, 6) <- ( 1, 2) = 6 2422 ( 1, 7) <- ( 1, 3) = 7 2423 2424 rootMode = shared: 2425 PetscSF Object: result_sf 2 MPI processes 2426 type: basic 2427 rank #leaves #roots 2428 [ 0] 8 2 2429 [ 1] 8 2 2430 leaves roots roots in global numbering 2431 ( 0, 0) <- ( 0, 0) = 0 2432 ( 0, 1) <- ( 0, 1) = 1 2433 ( 0, 2) <- ( 1, 0) = 2 2434 ( 0, 3) <- ( 1, 1) = 3 2435 ( 0, 4) <- ( 0, 0) = 0 2436 ( 0, 5) <- ( 0, 1) = 1 2437 ( 0, 6) <- ( 1, 0) = 2 2438 ( 0, 7) <- ( 1, 1) = 3 2439 ( 1, 0) <- ( 0, 0) = 0 2440 ( 1, 1) <- ( 0, 1) = 1 2441 ( 1, 2) <- ( 1, 0) = 2 2442 ( 1, 3) <- ( 1, 1) = 3 2443 ( 1, 4) <- ( 0, 0) = 0 2444 ( 1, 5) <- ( 0, 1) = 1 2445 ( 1, 6) <- ( 1, 0) = 2 2446 ( 1, 7) <- ( 1, 1) = 3 2447 .ve 2448 2449 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2450 @*/ 2451 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2452 { 2453 PetscInt i, s, nLeaves, nRoots; 2454 PetscInt *leafArrayOffsets; 2455 PetscInt *ilocal_new; 2456 PetscSFNode *iremote_new; 2457 PetscBool all_ilocal_null = PETSC_FALSE; 2458 PetscLayout glayout = NULL; 2459 PetscInt *gremote = NULL; 2460 PetscMPIInt rank, size; 2461 2462 PetscFunctionBegin; 2463 if (PetscDefined(USE_DEBUG)) { 2464 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2465 2466 PetscCall(PetscSFCreate(comm, &dummy)); 2467 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2468 PetscAssertPointer(sfs, 3); 2469 for (i = 0; i < nsfs; i++) { 2470 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2471 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2472 } 2473 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2474 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2475 PetscAssertPointer(newsf, 6); 2476 PetscCall(PetscSFDestroy(&dummy)); 2477 } 2478 if (!nsfs) { 2479 PetscCall(PetscSFCreate(comm, newsf)); 2480 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2481 PetscFunctionReturn(PETSC_SUCCESS); 2482 } 2483 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2484 PetscCallMPI(MPI_Comm_size(comm, &size)); 2485 2486 /* Calculate leaf array offsets */ 2487 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2488 leafArrayOffsets[0] = 0; 2489 for (s = 0; s < nsfs; s++) { 2490 PetscInt nl; 2491 2492 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2493 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2494 } 2495 nLeaves = leafArrayOffsets[nsfs]; 2496 2497 /* Calculate number of roots */ 2498 switch (rootMode) { 2499 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2500 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2501 if (PetscDefined(USE_DEBUG)) { 2502 for (s = 1; s < nsfs; s++) { 2503 PetscInt nr; 2504 2505 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2506 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2507 } 2508 } 2509 } break; 2510 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2511 /* Calculate also global layout in this case */ 2512 PetscInt *nls; 2513 PetscLayout *lts; 2514 PetscInt **inds; 2515 PetscInt j; 2516 PetscInt rootOffset = 0; 2517 2518 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2519 PetscCall(PetscLayoutCreate(comm, &glayout)); 2520 glayout->bs = 1; 2521 glayout->n = 0; 2522 glayout->N = 0; 2523 for (s = 0; s < nsfs; s++) { 2524 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2525 glayout->n += lts[s]->n; 2526 glayout->N += lts[s]->N; 2527 } 2528 PetscCall(PetscLayoutSetUp(glayout)); 2529 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2530 for (s = 0, j = 0; s < nsfs; s++) { 2531 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2532 rootOffset += lts[s]->N; 2533 PetscCall(PetscLayoutDestroy(<s[s])); 2534 PetscCall(PetscFree(inds[s])); 2535 } 2536 PetscCall(PetscFree3(lts, nls, inds)); 2537 nRoots = glayout->N; 2538 } break; 2539 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2540 /* nRoots calculated later in this case */ 2541 break; 2542 default: 2543 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2544 } 2545 2546 if (!leafOffsets) { 2547 all_ilocal_null = PETSC_TRUE; 2548 for (s = 0; s < nsfs; s++) { 2549 const PetscInt *ilocal; 2550 2551 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2552 if (ilocal) { 2553 all_ilocal_null = PETSC_FALSE; 2554 break; 2555 } 2556 } 2557 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2558 } 2559 2560 /* Renumber and concatenate local leaves */ 2561 ilocal_new = NULL; 2562 if (!all_ilocal_null) { 2563 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2564 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2565 for (s = 0; s < nsfs; s++) { 2566 const PetscInt *ilocal; 2567 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2568 PetscInt i, nleaves_l; 2569 2570 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2571 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2572 } 2573 } 2574 2575 /* Renumber and concatenate remote roots */ 2576 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2577 PetscInt rootOffset = 0; 2578 2579 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2580 for (i = 0; i < nLeaves; i++) { 2581 iremote_new[i].rank = -1; 2582 iremote_new[i].index = -1; 2583 } 2584 for (s = 0; s < nsfs; s++) { 2585 PetscInt i, nl, nr; 2586 PetscSF tmp_sf; 2587 const PetscSFNode *iremote; 2588 PetscSFNode *tmp_rootdata; 2589 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2590 2591 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2592 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2593 /* create helper SF with contiguous leaves */ 2594 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2595 PetscCall(PetscSFSetUp(tmp_sf)); 2596 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2597 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2598 for (i = 0; i < nr; i++) { 2599 tmp_rootdata[i].index = i + rootOffset; 2600 tmp_rootdata[i].rank = rank; 2601 } 2602 rootOffset += nr; 2603 } else { 2604 for (i = 0; i < nr; i++) { 2605 tmp_rootdata[i].index = i; 2606 tmp_rootdata[i].rank = rank; 2607 } 2608 } 2609 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2610 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2611 PetscCall(PetscSFDestroy(&tmp_sf)); 2612 PetscCall(PetscFree(tmp_rootdata)); 2613 } 2614 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2615 2616 /* Build the new SF */ 2617 PetscCall(PetscSFCreate(comm, newsf)); 2618 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2619 } else { 2620 /* Build the new SF */ 2621 PetscCall(PetscSFCreate(comm, newsf)); 2622 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2623 } 2624 PetscCall(PetscSFSetUp(*newsf)); 2625 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2626 PetscCall(PetscLayoutDestroy(&glayout)); 2627 PetscCall(PetscFree(gremote)); 2628 PetscCall(PetscFree(leafArrayOffsets)); 2629 PetscFunctionReturn(PETSC_SUCCESS); 2630 } 2631 2632 /*@ 2633 PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated `PetscSF` communications. 2634 2635 Collective 2636 2637 Input Parameters: 2638 + sf - star forest 2639 . unit - the data type contained within `rootdata` and `leafdata` 2640 . rootdata - root data that will be used for multiple `PetscSF` communications 2641 - leafdata - leaf data that will be used for multiple `PetscSF` communications 2642 2643 Level: advanced 2644 2645 Notes: 2646 Implementations of `PetscSF` can make optimizations 2647 for repeated communication using the same memory regions, but these optimizations 2648 can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed. 2649 The intended pattern is 2650 2651 .vb 2652 PetscMalloc2(nroots, &rootdata, nleaves, &leafdata); 2653 2654 PetscSFRegisterPersistent(sf, unit, rootdata, leafdata); 2655 // repeated use of rootdata and leafdata will now be optimized 2656 2657 PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE); 2658 PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE); 2659 // ... 2660 PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM); 2661 PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM); 2662 // ... (other communications) 2663 2664 // rootdata and leafdata must be deregistered before freeing 2665 // skipping this can lead to undefined behavior including 2666 // deadlocks 2667 PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata); 2668 2669 // it is now safe to free rootdata and leafdata 2670 PetscFree2(rootdata, leafdata); 2671 .ve 2672 2673 If you do not register `rootdata` and `leafdata` it will not cause an error, 2674 but optimizations that reduce the setup time for each communication cannot be 2675 made. Currently, the only implementation of `PetscSF` that benefits from 2676 `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default 2677 `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`. 2678 2679 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()` 2680 @*/ 2681 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2682 { 2683 PetscFunctionBegin; 2684 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2685 PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2686 PetscFunctionReturn(PETSC_SUCCESS); 2687 } 2688 2689 /*@ 2690 PetscSFDeregisterPersistent - Signal that repeated usage of `rootdata` and `leafdata` for `PetscSF` communication has concluded. 2691 2692 Collective 2693 2694 Input Parameters: 2695 + sf - star forest 2696 . unit - the data type contained within the `rootdata` and `leafdata` 2697 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()` 2698 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()` 2699 2700 Level: advanced 2701 2702 Note: 2703 See `PetscSFRegisterPersistent()` for when and how to use this function. 2704 2705 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()` 2706 @*/ 2707 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2708 { 2709 PetscFunctionBegin; 2710 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2711 PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2712 PetscFunctionReturn(PETSC_SUCCESS); 2713 } 2714 2715 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size) 2716 { 2717 MPI_Aint lb, lb_true, bytes, bytes_true; 2718 2719 PetscFunctionBegin; 2720 PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 2721 PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 2722 PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 2723 *size = bytes; 2724 PetscFunctionReturn(PETSC_SUCCESS); 2725 } 2726