1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #include <petscdevice_cuda.h> 9 #endif 10 11 #if defined(PETSC_HAVE_HIP) 12 #include <hip/hip_runtime.h> 13 #endif 14 15 #if defined(PETSC_CLANG_STATIC_ANALYZER) 16 extern void PetscSFCheckGraphSet(PetscSF, int); 17 #else 18 #if defined(PETSC_USE_DEBUG) 19 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 20 #else 21 #define PetscSFCheckGraphSet(sf, arg) \ 22 do { \ 23 } while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 29 30 /*@ 31 PetscSFCreate - create a star forest communication context 32 33 Collective 34 35 Input Parameter: 36 . comm - communicator on which the star forest will operate 37 38 Output Parameter: 39 . sf - new star forest context 40 41 Options Database Key: 42 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 43 . -sf_type window - Use MPI-3 one-sided window for communication 44 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 46 47 Level: intermediate 48 49 Note: 50 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 51 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 52 `SF`s are optimized and they have better performance than the general `SF`s. 53 54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 55 @*/ 56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 57 { 58 PetscSF b; 59 60 PetscFunctionBegin; 61 PetscAssertPointer(sf, 2); 62 PetscCall(PetscSFInitializePackage()); 63 64 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 65 b->nroots = -1; 66 b->nleaves = -1; 67 b->minleaf = PETSC_INT_MAX; 68 b->maxleaf = PETSC_INT_MIN; 69 b->nranks = -1; 70 b->rankorder = PETSC_TRUE; 71 b->ingroup = MPI_GROUP_NULL; 72 b->outgroup = MPI_GROUP_NULL; 73 b->graphset = PETSC_FALSE; 74 #if defined(PETSC_HAVE_DEVICE) 75 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 76 b->use_stream_aware_mpi = PETSC_FALSE; 77 b->unknown_input_stream = PETSC_FALSE; 78 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 79 b->backend = PETSCSF_BACKEND_KOKKOS; 80 #elif defined(PETSC_HAVE_CUDA) 81 b->backend = PETSCSF_BACKEND_CUDA; 82 #elif defined(PETSC_HAVE_HIP) 83 b->backend = PETSCSF_BACKEND_HIP; 84 #endif 85 86 #if defined(PETSC_HAVE_NVSHMEM) 87 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 88 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 89 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 90 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 91 #endif 92 #endif 93 b->vscat.from_n = -1; 94 b->vscat.to_n = -1; 95 b->vscat.unit = MPIU_SCALAR; 96 *sf = b; 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 /*@ 101 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 102 103 Collective 104 105 Input Parameter: 106 . sf - star forest 107 108 Level: advanced 109 110 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 111 @*/ 112 PetscErrorCode PetscSFReset(PetscSF sf) 113 { 114 PetscFunctionBegin; 115 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 116 PetscTryTypeMethod(sf, Reset); 117 PetscCall(PetscSFDestroy(&sf->rankssf)); 118 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_INT_MAX; 122 sf->maxleaf = PETSC_INT_MIN; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 PetscCall(PetscFree(sf->mine_alloc)); 127 PetscCall(PetscFree(sf->remote_alloc)); 128 sf->nranks = -1; 129 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 130 sf->degreeknown = PETSC_FALSE; 131 PetscCall(PetscFree(sf->degree)); 132 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 133 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 134 135 if (sf->multi) sf->multi->multi = NULL; 136 PetscCall(PetscSFDestroy(&sf->multi)); 137 138 PetscCall(PetscLayoutDestroy(&sf->map)); 139 140 #if defined(PETSC_HAVE_DEVICE) 141 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 142 #endif 143 144 sf->setupcalled = PETSC_FALSE; 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 /*@ 149 PetscSFSetType - Set the `PetscSF` communication implementation 150 151 Collective 152 153 Input Parameters: 154 + sf - the `PetscSF` context 155 - type - a known method 156 .vb 157 PETSCSFWINDOW - MPI-2/3 one-sided 158 PETSCSFBASIC - basic implementation using MPI-1 two-sided 159 .ve 160 161 Options Database Key: 162 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 163 164 Level: intermediate 165 166 Notes: 167 See `PetscSFType` for possible values 168 169 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 170 @*/ 171 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 172 { 173 PetscBool match; 174 PetscErrorCode (*r)(PetscSF); 175 176 PetscFunctionBegin; 177 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 178 PetscAssertPointer(type, 2); 179 180 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 181 if (match) PetscFunctionReturn(PETSC_SUCCESS); 182 183 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 184 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 185 /* Destroy the previous PetscSF implementation context */ 186 PetscTryTypeMethod(sf, Destroy); 187 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 188 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 189 PetscCall((*r)(sf)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 /*@ 194 PetscSFGetType - Get the `PetscSF` communication implementation 195 196 Not Collective 197 198 Input Parameter: 199 . sf - the `PetscSF` context 200 201 Output Parameter: 202 . type - the `PetscSF` type name 203 204 Level: intermediate 205 206 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 207 @*/ 208 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 209 { 210 PetscFunctionBegin; 211 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 212 PetscAssertPointer(type, 2); 213 *type = ((PetscObject)sf)->type_name; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 /*@ 218 PetscSFDestroy - destroy a star forest 219 220 Collective 221 222 Input Parameter: 223 . sf - address of star forest 224 225 Level: intermediate 226 227 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 228 @*/ 229 PetscErrorCode PetscSFDestroy(PetscSF *sf) 230 { 231 PetscFunctionBegin; 232 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 233 PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 234 if (--((PetscObject)*sf)->refct > 0) { 235 *sf = NULL; 236 PetscFunctionReturn(PETSC_SUCCESS); 237 } 238 PetscCall(PetscSFReset(*sf)); 239 PetscTryTypeMethod(*sf, Destroy); 240 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 241 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 242 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 243 if ((*sf)->use_stream_aware_mpi) { 244 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 245 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 246 } 247 #endif 248 PetscCall(PetscHeaderDestroy(sf)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 253 { 254 PetscInt i, nleaves; 255 PetscMPIInt size; 256 const PetscInt *ilocal; 257 const PetscSFNode *iremote; 258 259 PetscFunctionBegin; 260 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 261 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 262 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 263 for (i = 0; i < nleaves; i++) { 264 const PetscInt rank = iremote[i].rank; 265 const PetscInt remote = iremote[i].index; 266 const PetscInt leaf = ilocal ? ilocal[i] : i; 267 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 268 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 269 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 270 } 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 /*@ 275 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 276 277 Collective 278 279 Input Parameter: 280 . sf - star forest communication object 281 282 Level: beginner 283 284 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 285 @*/ 286 PetscErrorCode PetscSFSetUp(PetscSF sf) 287 { 288 PetscFunctionBegin; 289 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 290 PetscSFCheckGraphSet(sf, 1); 291 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 292 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 293 PetscCall(PetscSFCheckGraphValid_Private(sf)); 294 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 295 PetscTryTypeMethod(sf, SetUp); 296 #if defined(PETSC_HAVE_CUDA) 297 if (sf->backend == PETSCSF_BACKEND_CUDA) { 298 sf->ops->Malloc = PetscSFMalloc_CUDA; 299 sf->ops->Free = PetscSFFree_CUDA; 300 } 301 #endif 302 #if defined(PETSC_HAVE_HIP) 303 if (sf->backend == PETSCSF_BACKEND_HIP) { 304 sf->ops->Malloc = PetscSFMalloc_HIP; 305 sf->ops->Free = PetscSFFree_HIP; 306 } 307 #endif 308 309 #if defined(PETSC_HAVE_KOKKOS) 310 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 311 sf->ops->Malloc = PetscSFMalloc_Kokkos; 312 sf->ops->Free = PetscSFFree_Kokkos; 313 } 314 #endif 315 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 316 sf->setupcalled = PETSC_TRUE; 317 PetscFunctionReturn(PETSC_SUCCESS); 318 } 319 320 /*@ 321 PetscSFSetFromOptions - set `PetscSF` options using the options database 322 323 Logically Collective 324 325 Input Parameter: 326 . sf - star forest 327 328 Options Database Keys: 329 + -sf_type - implementation type, see `PetscSFSetType()` 330 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 331 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 332 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 333 If true, this option only works with `-use_gpu_aware_mpi 1`. 334 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 335 If true, this option only works with `-use_gpu_aware_mpi 1`. 336 337 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 338 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 339 the only available is kokkos. 340 341 Level: intermediate 342 343 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 344 @*/ 345 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 346 { 347 PetscSFType deft; 348 char type[256]; 349 PetscBool flg; 350 351 PetscFunctionBegin; 352 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 353 PetscObjectOptionsBegin((PetscObject)sf); 354 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 355 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 356 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 357 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 358 PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL)); 359 #if defined(PETSC_HAVE_DEVICE) 360 { 361 char backendstr[32] = {0}; 362 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 363 /* Change the defaults set in PetscSFCreate() with command line options */ 364 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 365 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 366 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 367 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 368 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 369 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 370 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 371 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 372 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 373 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 374 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 375 #elif defined(PETSC_HAVE_KOKKOS) 376 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 377 #endif 378 379 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 380 if (sf->use_stream_aware_mpi) { 381 MPI_Info info; 382 383 PetscCallMPI(MPI_Info_create(&info)); 384 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 385 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 386 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 387 PetscCallMPI(MPI_Info_free(&info)); 388 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 389 } 390 #endif 391 } 392 #endif 393 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 394 PetscOptionsEnd(); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 /*@ 399 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 400 401 Logically Collective 402 403 Input Parameters: 404 + sf - star forest 405 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 406 407 Level: advanced 408 409 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 410 @*/ 411 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 412 { 413 PetscFunctionBegin; 414 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 415 PetscValidLogicalCollectiveBool(sf, flg, 2); 416 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 417 sf->rankorder = flg; 418 PetscFunctionReturn(PETSC_SUCCESS); 419 } 420 421 /*@ 422 PetscSFSetGraph - Set a parallel star forest 423 424 Collective 425 426 Input Parameters: 427 + sf - star forest 428 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 429 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 430 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 431 during setup in debug mode) 432 . localmode - copy mode for `ilocal` 433 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 434 during setup in debug mode) 435 - remotemode - copy mode for `iremote` 436 437 Level: intermediate 438 439 Notes: 440 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 441 442 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 443 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 444 PETSc might modify the respective array; 445 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 446 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 447 448 Fortran Notes: 449 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 450 451 Developer Notes: 452 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 453 This also allows to compare leaf sets of two `PetscSF`s easily. 454 455 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 456 @*/ 457 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 458 { 459 PetscBool unique, contiguous; 460 461 PetscFunctionBegin; 462 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 463 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 464 if (nleaves > 0) PetscAssertPointer(iremote, 6); 465 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 466 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 467 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 468 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 469 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 470 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 471 472 if (sf->nroots >= 0) { /* Reset only if graph already set */ 473 PetscCall(PetscSFReset(sf)); 474 } 475 476 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 477 if (PetscDefined(USE_DEBUG)) { 478 PetscMPIInt size; 479 480 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 481 for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); } 482 } 483 484 sf->nroots = nroots; 485 sf->nleaves = nleaves; 486 487 if (localmode == PETSC_COPY_VALUES && ilocal) { 488 PetscInt *tlocal = NULL; 489 490 PetscCall(PetscMalloc1(nleaves, &tlocal)); 491 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 492 ilocal = tlocal; 493 } 494 if (remotemode == PETSC_COPY_VALUES) { 495 PetscSFNode *tremote = NULL; 496 497 PetscCall(PetscMalloc1(nleaves, &tremote)); 498 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 499 iremote = tremote; 500 } 501 502 if (nleaves && ilocal) { 503 PetscSFNode work; 504 505 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 506 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 507 unique = PetscNot(unique); 508 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 509 sf->minleaf = ilocal[0]; 510 sf->maxleaf = ilocal[nleaves - 1]; 511 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 512 } else { 513 sf->minleaf = 0; 514 sf->maxleaf = nleaves - 1; 515 unique = PETSC_TRUE; 516 contiguous = PETSC_TRUE; 517 } 518 519 if (contiguous) { 520 if (localmode == PETSC_USE_POINTER) { 521 ilocal = NULL; 522 } else { 523 PetscCall(PetscFree(ilocal)); 524 } 525 } 526 sf->mine = ilocal; 527 if (localmode == PETSC_USE_POINTER) { 528 sf->mine_alloc = NULL; 529 } else { 530 sf->mine_alloc = ilocal; 531 } 532 if (PetscDefined(USE_DEBUG)) { 533 PetscMPIInt size; 534 535 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 536 for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); } 537 } 538 sf->remote = iremote; 539 if (remotemode == PETSC_USE_POINTER) { 540 sf->remote_alloc = NULL; 541 } else { 542 sf->remote_alloc = iremote; 543 } 544 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 545 sf->graphset = PETSC_TRUE; 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 /*@ 550 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 551 552 Collective 553 554 Input Parameters: 555 + sf - The `PetscSF` 556 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 557 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 558 559 Level: intermediate 560 561 Notes: 562 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 563 `n` and `N` are the local and global sizes of `x` respectively. 564 565 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 566 sequential vectors `y` on all MPI processes. 567 568 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 569 sequential vector `y` on rank 0. 570 571 In above cases, entries of `x` are roots and entries of `y` are leaves. 572 573 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 574 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 575 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 576 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 577 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 578 579 In this case, roots and leaves are symmetric. 580 581 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 582 @*/ 583 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 584 { 585 MPI_Comm comm; 586 PetscInt n, N, res[2]; 587 PetscMPIInt rank, size; 588 PetscSFType type; 589 590 PetscFunctionBegin; 591 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 592 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 593 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 594 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 595 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 596 PetscCallMPI(MPI_Comm_size(comm, &size)); 597 598 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 599 PetscInt sizei = size; 600 601 type = PETSCSFALLTOALL; 602 PetscCall(PetscLayoutCreate(comm, &sf->map)); 603 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 604 PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei))); 605 PetscCall(PetscLayoutSetUp(sf->map)); 606 } else { 607 PetscCall(PetscLayoutGetLocalSize(map, &n)); 608 PetscCall(PetscLayoutGetSize(map, &N)); 609 res[0] = n; 610 res[1] = -n; 611 /* Check if n are same over all ranks so that we can optimize it */ 612 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 613 if (res[0] == -res[1]) { /* same n */ 614 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 615 } else { 616 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 617 } 618 PetscCall(PetscLayoutReference(map, &sf->map)); 619 } 620 PetscCall(PetscSFSetType(sf, type)); 621 622 sf->pattern = pattern; 623 sf->mine = NULL; /* Contiguous */ 624 625 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 626 Also set other easy stuff. 627 */ 628 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 629 sf->nleaves = N; 630 sf->nroots = n; 631 sf->nranks = size; 632 sf->minleaf = 0; 633 sf->maxleaf = N - 1; 634 } else if (pattern == PETSCSF_PATTERN_GATHER) { 635 sf->nleaves = rank ? 0 : N; 636 sf->nroots = n; 637 sf->nranks = rank ? 0 : size; 638 sf->minleaf = 0; 639 sf->maxleaf = rank ? -1 : N - 1; 640 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 641 sf->nleaves = size; 642 sf->nroots = size; 643 sf->nranks = size; 644 sf->minleaf = 0; 645 sf->maxleaf = size - 1; 646 } 647 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 648 sf->graphset = PETSC_TRUE; 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 /*@ 653 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 654 655 Collective 656 657 Input Parameter: 658 . sf - star forest to invert 659 660 Output Parameter: 661 . isf - inverse of `sf` 662 663 Level: advanced 664 665 Notes: 666 All roots must have degree 1. 667 668 The local space may be a permutation, but cannot be sparse. 669 670 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 671 @*/ 672 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 673 { 674 PetscMPIInt rank; 675 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 676 const PetscInt *ilocal; 677 PetscSFNode *roots, *leaves; 678 679 PetscFunctionBegin; 680 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 681 PetscSFCheckGraphSet(sf, 1); 682 PetscAssertPointer(isf, 2); 683 684 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 685 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 686 687 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 688 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 689 for (i = 0; i < maxlocal; i++) { 690 leaves[i].rank = rank; 691 leaves[i].index = i; 692 } 693 for (i = 0; i < nroots; i++) { 694 roots[i].rank = -1; 695 roots[i].index = -1; 696 } 697 PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 698 PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 699 700 /* Check whether our leaves are sparse */ 701 for (i = 0, count = 0; i < nroots; i++) 702 if (roots[i].rank >= 0) count++; 703 if (count == nroots) newilocal = NULL; 704 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 705 for (i = 0, count = 0; i < nroots; i++) { 706 if (roots[i].rank >= 0) { 707 newilocal[count] = i; 708 roots[count].rank = roots[i].rank; 709 roots[count].index = roots[i].index; 710 count++; 711 } 712 } 713 } 714 715 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 716 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 717 PetscCall(PetscFree2(roots, leaves)); 718 PetscFunctionReturn(PETSC_SUCCESS); 719 } 720 721 /*@ 722 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 723 724 Collective 725 726 Input Parameters: 727 + sf - communication object to duplicate 728 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 729 730 Output Parameter: 731 . newsf - new communication object 732 733 Level: beginner 734 735 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 736 @*/ 737 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 738 { 739 PetscSFType type; 740 MPI_Datatype dtype = MPIU_SCALAR; 741 742 PetscFunctionBegin; 743 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 744 PetscValidLogicalCollectiveEnum(sf, opt, 2); 745 PetscAssertPointer(newsf, 3); 746 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 747 PetscCall(PetscSFGetType(sf, &type)); 748 if (type) PetscCall(PetscSFSetType(*newsf, type)); 749 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 750 if (opt == PETSCSF_DUPLICATE_GRAPH) { 751 PetscSFCheckGraphSet(sf, 1); 752 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 753 PetscInt nroots, nleaves; 754 const PetscInt *ilocal; 755 const PetscSFNode *iremote; 756 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 757 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 758 } else { 759 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 760 } 761 } 762 /* Since oldtype is committed, so is newtype, according to MPI */ 763 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 764 (*newsf)->vscat.bs = sf->vscat.bs; 765 (*newsf)->vscat.unit = dtype; 766 (*newsf)->vscat.to_n = sf->vscat.to_n; 767 (*newsf)->vscat.from_n = sf->vscat.from_n; 768 /* Do not copy lsf. Build it on demand since it is rarely used */ 769 770 #if defined(PETSC_HAVE_DEVICE) 771 (*newsf)->backend = sf->backend; 772 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 773 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 774 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 775 #endif 776 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 777 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 778 PetscFunctionReturn(PETSC_SUCCESS); 779 } 780 781 /*@C 782 PetscSFGetGraph - Get the graph specifying a parallel star forest 783 784 Not Collective 785 786 Input Parameter: 787 . sf - star forest 788 789 Output Parameters: 790 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 791 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 792 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 793 - iremote - remote locations of root vertices for each leaf on the current process 794 795 Level: intermediate 796 797 Notes: 798 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 799 800 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 801 802 Fortran Notes: 803 The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you 804 want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array. 805 806 To check for a `NULL` `ilocal` use 807 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 808 809 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 810 @*/ 811 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 812 { 813 PetscFunctionBegin; 814 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 815 if (sf->ops->GetGraph) { 816 PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 817 } else { 818 if (nroots) *nroots = sf->nroots; 819 if (nleaves) *nleaves = sf->nleaves; 820 if (ilocal) *ilocal = sf->mine; 821 if (iremote) *iremote = sf->remote; 822 } 823 PetscFunctionReturn(PETSC_SUCCESS); 824 } 825 826 /*@ 827 PetscSFGetLeafRange - Get the active leaf ranges 828 829 Not Collective 830 831 Input Parameter: 832 . sf - star forest 833 834 Output Parameters: 835 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 836 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 837 838 Level: developer 839 840 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 841 @*/ 842 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 843 { 844 PetscFunctionBegin; 845 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 846 PetscSFCheckGraphSet(sf, 1); 847 if (minleaf) *minleaf = sf->minleaf; 848 if (maxleaf) *maxleaf = sf->maxleaf; 849 PetscFunctionReturn(PETSC_SUCCESS); 850 } 851 852 /*@ 853 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 854 855 Collective 856 857 Input Parameters: 858 + A - the star forest 859 . obj - Optional object that provides the prefix for the option names 860 - name - command line option 861 862 Level: intermediate 863 864 Note: 865 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 866 867 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 868 @*/ 869 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 870 { 871 PetscFunctionBegin; 872 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 873 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 874 PetscFunctionReturn(PETSC_SUCCESS); 875 } 876 877 /*@ 878 PetscSFView - view a star forest 879 880 Collective 881 882 Input Parameters: 883 + sf - star forest 884 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 885 886 Level: beginner 887 888 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 889 @*/ 890 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 891 { 892 PetscBool iascii; 893 PetscViewerFormat format; 894 895 PetscFunctionBegin; 896 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 897 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 898 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 899 PetscCheckSameComm(sf, 1, viewer, 2); 900 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 901 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 902 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 903 PetscMPIInt rank; 904 PetscInt j; 905 906 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 907 PetscCall(PetscViewerASCIIPushTab(viewer)); 908 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 909 if (!sf->graphset) { 910 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 911 PetscCall(PetscViewerASCIIPopTab(viewer)); 912 PetscFunctionReturn(PETSC_SUCCESS); 913 } 914 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 915 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 916 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 917 for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 918 PetscCall(PetscViewerFlush(viewer)); 919 PetscCall(PetscViewerGetFormat(viewer, &format)); 920 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 921 PetscMPIInt *tmpranks, *perm; 922 923 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 924 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 925 for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i; 926 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 927 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 928 for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) { 929 PetscMPIInt i = perm[ii]; 930 931 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 932 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 933 } 934 PetscCall(PetscFree2(tmpranks, perm)); 935 } 936 PetscCall(PetscViewerFlush(viewer)); 937 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 938 } 939 PetscCall(PetscViewerASCIIPopTab(viewer)); 940 } 941 PetscTryTypeMethod(sf, View, viewer); 942 PetscFunctionReturn(PETSC_SUCCESS); 943 } 944 945 /*@C 946 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 947 948 Not Collective 949 950 Input Parameter: 951 . sf - star forest 952 953 Output Parameters: 954 + nranks - number of ranks referenced by local part 955 . ranks - [`nranks`] array of ranks 956 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 957 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL` 958 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL` 959 960 Level: developer 961 962 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 963 @*/ 964 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 965 { 966 PetscFunctionBegin; 967 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 968 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 969 if (sf->ops->GetRootRanks) { 970 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 971 } else { 972 /* The generic implementation */ 973 if (nranks) *nranks = sf->nranks; 974 if (ranks) *ranks = sf->ranks; 975 if (roffset) *roffset = sf->roffset; 976 if (rmine) *rmine = sf->rmine; 977 if (rremote) *rremote = sf->rremote; 978 } 979 PetscFunctionReturn(PETSC_SUCCESS); 980 } 981 982 /*@C 983 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 984 985 Not Collective 986 987 Input Parameter: 988 . sf - star forest 989 990 Output Parameters: 991 + niranks - number of leaf ranks referencing roots on this process 992 . iranks - [`niranks`] array of ranks 993 . ioffset - [`niranks`+1] offset in `irootloc` for each rank 994 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 995 996 Level: developer 997 998 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 999 @*/ 1000 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 1001 { 1002 PetscFunctionBegin; 1003 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1004 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 1005 if (sf->ops->GetLeafRanks) { 1006 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 1007 } else { 1008 PetscSFType type; 1009 PetscCall(PetscSFGetType(sf, &type)); 1010 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1011 } 1012 PetscFunctionReturn(PETSC_SUCCESS); 1013 } 1014 1015 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 1016 { 1017 PetscInt i; 1018 for (i = 0; i < n; i++) { 1019 if (needle == list[i]) return PETSC_TRUE; 1020 } 1021 return PETSC_FALSE; 1022 } 1023 1024 /*@C 1025 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 1026 1027 Collective 1028 1029 Input Parameters: 1030 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1031 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1032 1033 Level: developer 1034 1035 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 1036 @*/ 1037 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1038 { 1039 PetscHMapI table; 1040 PetscHashIter pos; 1041 PetscMPIInt size, groupsize, *groupranks, *ranks; 1042 PetscInt *rcount; 1043 PetscInt irank, sfnrank, ranksi; 1044 PetscMPIInt i, orank = -1; 1045 1046 PetscFunctionBegin; 1047 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1048 PetscSFCheckGraphSet(sf, 1); 1049 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1050 PetscCall(PetscHMapICreateWithSize(10, &table)); 1051 for (i = 0; i < sf->nleaves; i++) { 1052 /* Log 1-based rank */ 1053 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1054 } 1055 PetscCall(PetscHMapIGetSize(table, &sfnrank)); 1056 PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks)); 1057 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1058 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1059 PetscHashIterBegin(table, pos); 1060 for (i = 0; i < sf->nranks; i++) { 1061 PetscHashIterGetKey(table, pos, ranksi); 1062 PetscCall(PetscMPIIntCast(ranksi, &ranks[i])); 1063 PetscHashIterGetVal(table, pos, rcount[i]); 1064 PetscHashIterNext(table, pos); 1065 ranks[i]--; /* Convert back to 0-based */ 1066 } 1067 PetscCall(PetscHMapIDestroy(&table)); 1068 1069 /* We expect that dgroup is reliably "small" while nranks could be large */ 1070 { 1071 MPI_Group group = MPI_GROUP_NULL; 1072 PetscMPIInt *dgroupranks; 1073 1074 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1075 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1076 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1077 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1078 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1079 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1080 PetscCallMPI(MPI_Group_free(&group)); 1081 PetscCall(PetscFree(dgroupranks)); 1082 } 1083 1084 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1085 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1086 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1087 if (InList(ranks[i], groupsize, groupranks)) break; 1088 } 1089 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1090 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1091 } 1092 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1093 PetscMPIInt tmprank; 1094 PetscInt tmpcount; 1095 1096 tmprank = ranks[i]; 1097 tmpcount = rcount[i]; 1098 ranks[i] = ranks[sf->ndranks]; 1099 rcount[i] = rcount[sf->ndranks]; 1100 ranks[sf->ndranks] = tmprank; 1101 rcount[sf->ndranks] = tmpcount; 1102 sf->ndranks++; 1103 } 1104 } 1105 PetscCall(PetscFree(groupranks)); 1106 PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount)); 1107 if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1108 sf->roffset[0] = 0; 1109 for (i = 0; i < sf->nranks; i++) { 1110 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1111 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1112 rcount[i] = 0; 1113 } 1114 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1115 /* short circuit */ 1116 if (orank != sf->remote[i].rank) { 1117 /* Search for index of iremote[i].rank in sf->ranks */ 1118 PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank)); 1119 PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank)); 1120 if (irank < 0) { 1121 PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1122 if (irank >= 0) irank += sf->ndranks; 1123 } 1124 } 1125 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank); 1126 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1127 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1128 rcount[irank]++; 1129 } 1130 PetscCall(PetscFree2(rcount, ranks)); 1131 PetscFunctionReturn(PETSC_SUCCESS); 1132 } 1133 1134 /*@C 1135 PetscSFGetGroups - gets incoming and outgoing process groups 1136 1137 Collective 1138 1139 Input Parameter: 1140 . sf - star forest 1141 1142 Output Parameters: 1143 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1144 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1145 1146 Level: developer 1147 1148 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1149 @*/ 1150 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1151 { 1152 MPI_Group group = MPI_GROUP_NULL; 1153 1154 PetscFunctionBegin; 1155 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1156 if (sf->ingroup == MPI_GROUP_NULL) { 1157 PetscInt i; 1158 const PetscInt *indegree; 1159 PetscMPIInt rank, *outranks, *inranks, indegree0; 1160 PetscSFNode *remote; 1161 PetscSF bgcount; 1162 1163 /* Compute the number of incoming ranks */ 1164 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1165 for (i = 0; i < sf->nranks; i++) { 1166 remote[i].rank = sf->ranks[i]; 1167 remote[i].index = 0; 1168 } 1169 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1170 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1171 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1172 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1173 /* Enumerate the incoming ranks */ 1174 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1175 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1176 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1177 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1178 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1179 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1180 PetscCall(PetscMPIIntCast(indegree[0], &indegree0)); 1181 PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup)); 1182 PetscCallMPI(MPI_Group_free(&group)); 1183 PetscCall(PetscFree2(inranks, outranks)); 1184 PetscCall(PetscSFDestroy(&bgcount)); 1185 } 1186 *incoming = sf->ingroup; 1187 1188 if (sf->outgroup == MPI_GROUP_NULL) { 1189 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1190 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1191 PetscCallMPI(MPI_Group_free(&group)); 1192 } 1193 *outgoing = sf->outgroup; 1194 PetscFunctionReturn(PETSC_SUCCESS); 1195 } 1196 1197 /*@ 1198 PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 1199 1200 Collective 1201 1202 Input Parameter: 1203 . sf - star forest 1204 1205 Output Parameter: 1206 . rsf - the star forest with a single root per process to perform communications 1207 1208 Level: developer 1209 1210 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 1211 @*/ 1212 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 1213 { 1214 PetscFunctionBegin; 1215 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1216 PetscAssertPointer(rsf, 2); 1217 if (!sf->rankssf) { 1218 PetscSFNode *rremotes; 1219 const PetscMPIInt *ranks; 1220 PetscMPIInt nranks; 1221 1222 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 1223 PetscCall(PetscMalloc1(nranks, &rremotes)); 1224 for (PetscInt i = 0; i < nranks; i++) { 1225 rremotes[i].rank = ranks[i]; 1226 rremotes[i].index = 0; 1227 } 1228 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 1229 PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 1230 } 1231 *rsf = sf->rankssf; 1232 PetscFunctionReturn(PETSC_SUCCESS); 1233 } 1234 1235 /*@ 1236 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1237 1238 Collective 1239 1240 Input Parameter: 1241 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1242 1243 Output Parameter: 1244 . multi - star forest with split roots, such that each root has degree exactly 1 1245 1246 Level: developer 1247 1248 Note: 1249 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1250 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1251 edge, it is a candidate for future optimization that might involve its removal. 1252 1253 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1254 @*/ 1255 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1256 { 1257 PetscFunctionBegin; 1258 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1259 PetscAssertPointer(multi, 2); 1260 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1261 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1262 *multi = sf->multi; 1263 sf->multi->multi = sf->multi; 1264 PetscFunctionReturn(PETSC_SUCCESS); 1265 } 1266 if (!sf->multi) { 1267 const PetscInt *indegree; 1268 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1269 PetscSFNode *remote; 1270 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1271 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1272 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1273 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1274 inoffset[0] = 0; 1275 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1276 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1277 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1278 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1279 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1280 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1281 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1282 } 1283 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1284 for (i = 0; i < sf->nleaves; i++) { 1285 remote[i].rank = sf->remote[i].rank; 1286 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1287 } 1288 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1289 sf->multi->multi = sf->multi; 1290 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1291 if (sf->rankorder) { /* Sort the ranks */ 1292 PetscMPIInt rank; 1293 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1294 PetscSFNode *newremote; 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1296 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1297 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1298 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1299 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1300 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1301 /* Sort the incoming ranks at each vertex, build the inverse map */ 1302 for (i = 0; i < sf->nroots; i++) { 1303 PetscInt j; 1304 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1305 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1306 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1307 } 1308 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1309 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1310 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1311 for (i = 0; i < sf->nleaves; i++) { 1312 newremote[i].rank = sf->remote[i].rank; 1313 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1314 } 1315 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1316 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1317 } 1318 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1319 } 1320 *multi = sf->multi; 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } 1323 1324 /*@C 1325 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1326 1327 Collective 1328 1329 Input Parameters: 1330 + sf - original star forest 1331 . nselected - number of selected roots on this process 1332 - selected - indices of the selected roots on this process 1333 1334 Output Parameter: 1335 . esf - new star forest 1336 1337 Level: advanced 1338 1339 Note: 1340 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1341 be done by calling PetscSFGetGraph(). 1342 1343 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1344 @*/ 1345 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1346 { 1347 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1348 const PetscInt *ilocal; 1349 signed char *rootdata, *leafdata, *leafmem; 1350 const PetscSFNode *iremote; 1351 PetscSFNode *new_iremote; 1352 MPI_Comm comm; 1353 1354 PetscFunctionBegin; 1355 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1356 PetscSFCheckGraphSet(sf, 1); 1357 if (nselected) PetscAssertPointer(selected, 3); 1358 PetscAssertPointer(esf, 4); 1359 1360 PetscCall(PetscSFSetUp(sf)); 1361 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1362 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1363 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1364 1365 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1366 PetscBool dups; 1367 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1368 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1369 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1370 } 1371 1372 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1373 else { 1374 /* A generic version of creating embedded sf */ 1375 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1376 maxlocal = maxleaf - minleaf + 1; 1377 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1378 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1379 /* Tag selected roots and bcast to leaves */ 1380 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1381 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1382 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1383 1384 /* Build esf with leaves that are still connected */ 1385 esf_nleaves = 0; 1386 for (i = 0; i < nleaves; i++) { 1387 j = ilocal ? ilocal[i] : i; 1388 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1389 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1390 */ 1391 esf_nleaves += (leafdata[j] ? 1 : 0); 1392 } 1393 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1394 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1395 for (i = n = 0; i < nleaves; i++) { 1396 j = ilocal ? ilocal[i] : i; 1397 if (leafdata[j]) { 1398 new_ilocal[n] = j; 1399 new_iremote[n].rank = iremote[i].rank; 1400 new_iremote[n].index = iremote[i].index; 1401 ++n; 1402 } 1403 } 1404 PetscCall(PetscSFCreate(comm, esf)); 1405 PetscCall(PetscSFSetFromOptions(*esf)); 1406 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1407 PetscCall(PetscFree2(rootdata, leafmem)); 1408 } 1409 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1410 PetscFunctionReturn(PETSC_SUCCESS); 1411 } 1412 1413 /*@C 1414 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1415 1416 Collective 1417 1418 Input Parameters: 1419 + sf - original star forest 1420 . nselected - number of selected leaves on this process 1421 - selected - indices of the selected leaves on this process 1422 1423 Output Parameter: 1424 . newsf - new star forest 1425 1426 Level: advanced 1427 1428 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1429 @*/ 1430 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1431 { 1432 const PetscSFNode *iremote; 1433 PetscSFNode *new_iremote; 1434 const PetscInt *ilocal; 1435 PetscInt i, nroots, *leaves, *new_ilocal; 1436 MPI_Comm comm; 1437 1438 PetscFunctionBegin; 1439 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1440 PetscSFCheckGraphSet(sf, 1); 1441 if (nselected) PetscAssertPointer(selected, 3); 1442 PetscAssertPointer(newsf, 4); 1443 1444 /* Uniq selected[] and put results in leaves[] */ 1445 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1446 PetscCall(PetscMalloc1(nselected, &leaves)); 1447 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1448 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1449 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1450 1451 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1452 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1453 else { 1454 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1455 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1456 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1457 for (i = 0; i < nselected; ++i) { 1458 const PetscInt l = leaves[i]; 1459 new_ilocal[i] = ilocal ? ilocal[l] : l; 1460 new_iremote[i].rank = iremote[l].rank; 1461 new_iremote[i].index = iremote[l].index; 1462 } 1463 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1464 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1465 } 1466 PetscCall(PetscFree(leaves)); 1467 PetscFunctionReturn(PETSC_SUCCESS); 1468 } 1469 1470 /*@C 1471 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1472 1473 Collective 1474 1475 Input Parameters: 1476 + sf - star forest on which to communicate 1477 . unit - data type associated with each node 1478 . rootdata - buffer to broadcast 1479 - op - operation to use for reduction 1480 1481 Output Parameter: 1482 . leafdata - buffer to be reduced with values from each leaf's respective root 1483 1484 Level: intermediate 1485 1486 Note: 1487 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1488 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1489 use `PetscSFBcastWithMemTypeBegin()` instead. 1490 1491 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1492 @*/ 1493 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1494 { 1495 PetscMemType rootmtype, leafmtype; 1496 1497 PetscFunctionBegin; 1498 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1499 PetscCall(PetscSFSetUp(sf)); 1500 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1501 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1502 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1503 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1504 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1505 PetscFunctionReturn(PETSC_SUCCESS); 1506 } 1507 1508 /*@C 1509 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1510 to `PetscSFBcastEnd()` 1511 1512 Collective 1513 1514 Input Parameters: 1515 + sf - star forest on which to communicate 1516 . unit - data type associated with each node 1517 . rootmtype - memory type of rootdata 1518 . rootdata - buffer to broadcast 1519 . leafmtype - memory type of leafdata 1520 - op - operation to use for reduction 1521 1522 Output Parameter: 1523 . leafdata - buffer to be reduced with values from each leaf's respective root 1524 1525 Level: intermediate 1526 1527 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1528 @*/ 1529 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1530 { 1531 PetscFunctionBegin; 1532 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1533 PetscCall(PetscSFSetUp(sf)); 1534 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1535 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1536 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1537 PetscFunctionReturn(PETSC_SUCCESS); 1538 } 1539 1540 /*@C 1541 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1542 1543 Collective 1544 1545 Input Parameters: 1546 + sf - star forest 1547 . unit - data type 1548 . rootdata - buffer to broadcast 1549 - op - operation to use for reduction 1550 1551 Output Parameter: 1552 . leafdata - buffer to be reduced with values from each leaf's respective root 1553 1554 Level: intermediate 1555 1556 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1557 @*/ 1558 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1559 { 1560 PetscFunctionBegin; 1561 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1562 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1563 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1564 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1565 PetscFunctionReturn(PETSC_SUCCESS); 1566 } 1567 1568 /*@C 1569 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1570 1571 Collective 1572 1573 Input Parameters: 1574 + sf - star forest 1575 . unit - data type 1576 . leafdata - values to reduce 1577 - op - reduction operation 1578 1579 Output Parameter: 1580 . rootdata - result of reduction of values from all leaves of each root 1581 1582 Level: intermediate 1583 1584 Note: 1585 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1586 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1587 use `PetscSFReduceWithMemTypeBegin()` instead. 1588 1589 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 1590 @*/ 1591 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1592 { 1593 PetscMemType rootmtype, leafmtype; 1594 1595 PetscFunctionBegin; 1596 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1597 PetscCall(PetscSFSetUp(sf)); 1598 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1599 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1600 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1601 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1602 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1603 PetscFunctionReturn(PETSC_SUCCESS); 1604 } 1605 1606 /*@C 1607 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1608 1609 Collective 1610 1611 Input Parameters: 1612 + sf - star forest 1613 . unit - data type 1614 . leafmtype - memory type of leafdata 1615 . leafdata - values to reduce 1616 . rootmtype - memory type of rootdata 1617 - op - reduction operation 1618 1619 Output Parameter: 1620 . rootdata - result of reduction of values from all leaves of each root 1621 1622 Level: intermediate 1623 1624 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1625 @*/ 1626 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1627 { 1628 PetscFunctionBegin; 1629 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1630 PetscCall(PetscSFSetUp(sf)); 1631 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1632 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1633 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1634 PetscFunctionReturn(PETSC_SUCCESS); 1635 } 1636 1637 /*@C 1638 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1639 1640 Collective 1641 1642 Input Parameters: 1643 + sf - star forest 1644 . unit - data type 1645 . leafdata - values to reduce 1646 - op - reduction operation 1647 1648 Output Parameter: 1649 . rootdata - result of reduction of values from all leaves of each root 1650 1651 Level: intermediate 1652 1653 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1654 @*/ 1655 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1656 { 1657 PetscFunctionBegin; 1658 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1659 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1660 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1661 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 /*@C 1666 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1667 to be completed with `PetscSFFetchAndOpEnd()` 1668 1669 Collective 1670 1671 Input Parameters: 1672 + sf - star forest 1673 . unit - data type 1674 . leafdata - leaf values to use in reduction 1675 - op - operation to use for reduction 1676 1677 Output Parameters: 1678 + rootdata - root values to be updated, input state is seen by first process to perform an update 1679 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1680 1681 Level: advanced 1682 1683 Note: 1684 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1685 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1686 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1687 integers. 1688 1689 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1690 @*/ 1691 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1692 { 1693 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1694 1695 PetscFunctionBegin; 1696 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1697 PetscCall(PetscSFSetUp(sf)); 1698 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1699 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1700 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1701 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1702 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1703 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1704 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1705 PetscFunctionReturn(PETSC_SUCCESS); 1706 } 1707 1708 /*@C 1709 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1710 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1711 1712 Collective 1713 1714 Input Parameters: 1715 + sf - star forest 1716 . unit - data type 1717 . rootmtype - memory type of rootdata 1718 . leafmtype - memory type of leafdata 1719 . leafdata - leaf values to use in reduction 1720 . leafupdatemtype - memory type of leafupdate 1721 - op - operation to use for reduction 1722 1723 Output Parameters: 1724 + rootdata - root values to be updated, input state is seen by first process to perform an update 1725 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1726 1727 Level: advanced 1728 1729 Note: 1730 See `PetscSFFetchAndOpBegin()` for more details. 1731 1732 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1733 @*/ 1734 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1735 { 1736 PetscFunctionBegin; 1737 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1738 PetscCall(PetscSFSetUp(sf)); 1739 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1740 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1741 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1742 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1743 PetscFunctionReturn(PETSC_SUCCESS); 1744 } 1745 1746 /*@C 1747 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1748 to fetch values from roots and update atomically by applying operation using my leaf value 1749 1750 Collective 1751 1752 Input Parameters: 1753 + sf - star forest 1754 . unit - data type 1755 . leafdata - leaf values to use in reduction 1756 - op - operation to use for reduction 1757 1758 Output Parameters: 1759 + rootdata - root values to be updated, input state is seen by first process to perform an update 1760 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1761 1762 Level: advanced 1763 1764 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1765 @*/ 1766 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1767 { 1768 PetscFunctionBegin; 1769 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1770 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1771 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1772 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1773 PetscFunctionReturn(PETSC_SUCCESS); 1774 } 1775 1776 /*@C 1777 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1778 1779 Collective 1780 1781 Input Parameter: 1782 . sf - star forest 1783 1784 Output Parameter: 1785 . degree - degree of each root vertex 1786 1787 Level: advanced 1788 1789 Note: 1790 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1791 1792 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1793 @*/ 1794 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[]) 1795 { 1796 PetscFunctionBegin; 1797 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1798 PetscSFCheckGraphSet(sf, 1); 1799 PetscAssertPointer(degree, 2); 1800 if (!sf->degreeknown) { 1801 PetscInt i, nroots = sf->nroots, maxlocal; 1802 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1803 maxlocal = sf->maxleaf - sf->minleaf + 1; 1804 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1805 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1806 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1807 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1808 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1809 } 1810 *degree = NULL; 1811 PetscFunctionReturn(PETSC_SUCCESS); 1812 } 1813 1814 /*@C 1815 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1816 1817 Collective 1818 1819 Input Parameter: 1820 . sf - star forest 1821 1822 Output Parameter: 1823 . degree - degree of each root vertex 1824 1825 Level: developer 1826 1827 Note: 1828 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1829 1830 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1831 @*/ 1832 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1833 { 1834 PetscFunctionBegin; 1835 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1836 PetscSFCheckGraphSet(sf, 1); 1837 PetscAssertPointer(degree, 2); 1838 if (!sf->degreeknown) { 1839 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1840 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1841 PetscCall(PetscFree(sf->degreetmp)); 1842 sf->degreeknown = PETSC_TRUE; 1843 } 1844 *degree = sf->degree; 1845 PetscFunctionReturn(PETSC_SUCCESS); 1846 } 1847 1848 /*@C 1849 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1850 Each multi-root is assigned index of the corresponding original root. 1851 1852 Collective 1853 1854 Input Parameters: 1855 + sf - star forest 1856 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1857 1858 Output Parameters: 1859 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1860 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1861 1862 Level: developer 1863 1864 Note: 1865 The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1866 1867 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1868 @*/ 1869 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1870 { 1871 PetscSF msf; 1872 PetscInt k = 0, nroots, nmroots; 1873 1874 PetscFunctionBegin; 1875 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1876 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1877 if (nroots) PetscAssertPointer(degree, 2); 1878 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1879 PetscAssertPointer(multiRootsOrigNumbering, 4); 1880 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1881 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1882 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1883 for (PetscInt i = 0; i < nroots; i++) { 1884 if (!degree[i]) continue; 1885 for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1886 } 1887 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1888 if (nMultiRoots) *nMultiRoots = nmroots; 1889 PetscFunctionReturn(PETSC_SUCCESS); 1890 } 1891 1892 /*@C 1893 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1894 1895 Collective 1896 1897 Input Parameters: 1898 + sf - star forest 1899 . unit - data type 1900 - leafdata - leaf data to gather to roots 1901 1902 Output Parameter: 1903 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1904 1905 Level: intermediate 1906 1907 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1908 @*/ 1909 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1910 { 1911 PetscSF multi = NULL; 1912 1913 PetscFunctionBegin; 1914 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1915 PetscCall(PetscSFSetUp(sf)); 1916 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1917 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1918 PetscFunctionReturn(PETSC_SUCCESS); 1919 } 1920 1921 /*@C 1922 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1923 1924 Collective 1925 1926 Input Parameters: 1927 + sf - star forest 1928 . unit - data type 1929 - leafdata - leaf data to gather to roots 1930 1931 Output Parameter: 1932 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1933 1934 Level: intermediate 1935 1936 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1937 @*/ 1938 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1939 { 1940 PetscSF multi = NULL; 1941 1942 PetscFunctionBegin; 1943 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1944 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1945 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1946 PetscFunctionReturn(PETSC_SUCCESS); 1947 } 1948 1949 /*@C 1950 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1951 1952 Collective 1953 1954 Input Parameters: 1955 + sf - star forest 1956 . unit - data type 1957 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1958 1959 Output Parameter: 1960 . leafdata - leaf data to be update with personal data from each respective root 1961 1962 Level: intermediate 1963 1964 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 1965 @*/ 1966 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1967 { 1968 PetscSF multi = NULL; 1969 1970 PetscFunctionBegin; 1971 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1972 PetscCall(PetscSFSetUp(sf)); 1973 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1974 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1975 PetscFunctionReturn(PETSC_SUCCESS); 1976 } 1977 1978 /*@C 1979 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1980 1981 Collective 1982 1983 Input Parameters: 1984 + sf - star forest 1985 . unit - data type 1986 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1987 1988 Output Parameter: 1989 . leafdata - leaf data to be update with personal data from each respective root 1990 1991 Level: intermediate 1992 1993 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1994 @*/ 1995 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1996 { 1997 PetscSF multi = NULL; 1998 1999 PetscFunctionBegin; 2000 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2001 PetscCall(PetscSFGetMultiSF(sf, &multi)); 2002 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 2003 PetscFunctionReturn(PETSC_SUCCESS); 2004 } 2005 2006 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 2007 { 2008 PetscInt i, n, nleaves; 2009 const PetscInt *ilocal = NULL; 2010 PetscHSetI seen; 2011 2012 PetscFunctionBegin; 2013 if (PetscDefined(USE_DEBUG)) { 2014 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 2015 PetscCall(PetscHSetICreate(&seen)); 2016 for (i = 0; i < nleaves; i++) { 2017 const PetscInt leaf = ilocal ? ilocal[i] : i; 2018 PetscCall(PetscHSetIAdd(seen, leaf)); 2019 } 2020 PetscCall(PetscHSetIGetSize(seen, &n)); 2021 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 2022 PetscCall(PetscHSetIDestroy(&seen)); 2023 } 2024 PetscFunctionReturn(PETSC_SUCCESS); 2025 } 2026 2027 /*@ 2028 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2029 2030 Input Parameters: 2031 + sfA - The first `PetscSF` 2032 - sfB - The second `PetscSF` 2033 2034 Output Parameter: 2035 . sfBA - The composite `PetscSF` 2036 2037 Level: developer 2038 2039 Notes: 2040 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2041 forests, i.e. the same leaf is not connected with different roots. 2042 2043 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 2044 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 2045 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 2046 `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 2047 2048 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2049 @*/ 2050 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2051 { 2052 const PetscSFNode *remotePointsA, *remotePointsB; 2053 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 2054 const PetscInt *localPointsA, *localPointsB; 2055 PetscInt *localPointsBA; 2056 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 2057 PetscBool denseB; 2058 2059 PetscFunctionBegin; 2060 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2061 PetscSFCheckGraphSet(sfA, 1); 2062 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2063 PetscSFCheckGraphSet(sfB, 2); 2064 PetscCheckSameComm(sfA, 1, sfB, 2); 2065 PetscAssertPointer(sfBA, 3); 2066 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2067 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2068 2069 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2070 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2071 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2072 numRootsB; otherwise, garbage will be broadcasted. 2073 Example (comm size = 1): 2074 sfA: 0 <- (0, 0) 2075 sfB: 100 <- (0, 0) 2076 101 <- (0, 1) 2077 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2078 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2079 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2080 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2081 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2082 for (i = 0; i < numRootsB; i++) { 2083 reorderedRemotePointsA[i].rank = -1; 2084 reorderedRemotePointsA[i].index = -1; 2085 } 2086 for (i = 0; i < numLeavesA; i++) { 2087 PetscInt localp = localPointsA ? localPointsA[i] : i; 2088 2089 if (localp >= numRootsB) continue; 2090 reorderedRemotePointsA[localp] = remotePointsA[i]; 2091 } 2092 remotePointsA = reorderedRemotePointsA; 2093 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2094 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2095 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2096 leafdataB[i].rank = -1; 2097 leafdataB[i].index = -1; 2098 } 2099 PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2100 PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2101 PetscCall(PetscFree(reorderedRemotePointsA)); 2102 2103 denseB = (PetscBool)!localPointsB; 2104 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2105 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2106 else numLeavesBA++; 2107 } 2108 if (denseB) { 2109 localPointsBA = NULL; 2110 remotePointsBA = leafdataB; 2111 } else { 2112 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2113 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2114 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2115 const PetscInt l = localPointsB ? localPointsB[i] : i; 2116 2117 if (leafdataB[l - minleaf].rank == -1) continue; 2118 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2119 localPointsBA[numLeavesBA] = l; 2120 numLeavesBA++; 2121 } 2122 PetscCall(PetscFree(leafdataB)); 2123 } 2124 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2125 PetscCall(PetscSFSetFromOptions(*sfBA)); 2126 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2127 PetscFunctionReturn(PETSC_SUCCESS); 2128 } 2129 2130 /*@ 2131 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2132 2133 Input Parameters: 2134 + sfA - The first `PetscSF` 2135 - sfB - The second `PetscSF` 2136 2137 Output Parameter: 2138 . sfBA - The composite `PetscSF`. 2139 2140 Level: developer 2141 2142 Notes: 2143 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2144 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2145 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2146 2147 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2148 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2149 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 2150 on `sfA`, then 2151 a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 2152 2153 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2154 @*/ 2155 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2156 { 2157 const PetscSFNode *remotePointsA, *remotePointsB; 2158 PetscSFNode *remotePointsBA; 2159 const PetscInt *localPointsA, *localPointsB; 2160 PetscSFNode *reorderedRemotePointsA = NULL; 2161 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2162 MPI_Op op; 2163 #if defined(PETSC_USE_64BIT_INDICES) 2164 PetscBool iswin; 2165 #endif 2166 2167 PetscFunctionBegin; 2168 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2169 PetscSFCheckGraphSet(sfA, 1); 2170 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2171 PetscSFCheckGraphSet(sfB, 2); 2172 PetscCheckSameComm(sfA, 1, sfB, 2); 2173 PetscAssertPointer(sfBA, 3); 2174 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2175 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2176 2177 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2178 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2179 2180 /* TODO: Check roots of sfB have degree of 1 */ 2181 /* Once we implement it, we can replace the MPI_MAXLOC 2182 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2183 We use MPI_MAXLOC only to have a deterministic output from this routine if 2184 the root condition is not meet. 2185 */ 2186 op = MPI_MAXLOC; 2187 #if defined(PETSC_USE_64BIT_INDICES) 2188 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2189 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2190 if (iswin) op = MPI_REPLACE; 2191 #endif 2192 2193 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2194 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2195 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2196 reorderedRemotePointsA[i].rank = -1; 2197 reorderedRemotePointsA[i].index = -1; 2198 } 2199 if (localPointsA) { 2200 for (i = 0; i < numLeavesA; i++) { 2201 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2202 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2203 } 2204 } else { 2205 for (i = 0; i < numLeavesA; i++) { 2206 if (i > maxleaf || i < minleaf) continue; 2207 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2208 } 2209 } 2210 2211 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2212 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2213 for (i = 0; i < numRootsB; i++) { 2214 remotePointsBA[i].rank = -1; 2215 remotePointsBA[i].index = -1; 2216 } 2217 2218 PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2219 PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2220 PetscCall(PetscFree(reorderedRemotePointsA)); 2221 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2222 if (remotePointsBA[i].rank == -1) continue; 2223 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2224 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2225 localPointsBA[numLeavesBA] = i; 2226 numLeavesBA++; 2227 } 2228 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2229 PetscCall(PetscSFSetFromOptions(*sfBA)); 2230 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2231 PetscFunctionReturn(PETSC_SUCCESS); 2232 } 2233 2234 /* 2235 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2236 2237 Input Parameter: 2238 . sf - The global `PetscSF` 2239 2240 Output Parameter: 2241 . out - The local `PetscSF` 2242 2243 .seealso: `PetscSF`, `PetscSFCreate()` 2244 */ 2245 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2246 { 2247 MPI_Comm comm; 2248 PetscMPIInt myrank; 2249 const PetscInt *ilocal; 2250 const PetscSFNode *iremote; 2251 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2252 PetscSFNode *liremote; 2253 PetscSF lsf; 2254 2255 PetscFunctionBegin; 2256 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2257 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2258 else { 2259 PetscMPIInt irank; 2260 2261 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2262 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2263 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2264 2265 /* Find out local edges and build a local SF */ 2266 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2267 for (i = lnleaves = 0; i < nleaves; i++) { 2268 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2269 if (irank == myrank) lnleaves++; 2270 } 2271 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2272 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2273 2274 for (i = j = 0; i < nleaves; i++) { 2275 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2276 if (irank == myrank) { 2277 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2278 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2279 liremote[j].index = iremote[i].index; 2280 j++; 2281 } 2282 } 2283 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2284 PetscCall(PetscSFSetFromOptions(lsf)); 2285 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2286 PetscCall(PetscSFSetUp(lsf)); 2287 *out = lsf; 2288 } 2289 PetscFunctionReturn(PETSC_SUCCESS); 2290 } 2291 2292 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2293 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2294 { 2295 PetscMemType rootmtype, leafmtype; 2296 2297 PetscFunctionBegin; 2298 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2299 PetscCall(PetscSFSetUp(sf)); 2300 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2301 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2302 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2303 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2304 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2305 PetscFunctionReturn(PETSC_SUCCESS); 2306 } 2307 2308 /*@ 2309 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2310 2311 Input Parameters: 2312 + comm - the communicator 2313 . nsfs - the number of input `PetscSF` 2314 . sfs - the array of input `PetscSF` 2315 . rootMode - the root mode specifying how roots are handled 2316 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2317 2318 Output Parameter: 2319 . newsf - The resulting `PetscSF` 2320 2321 Level: advanced 2322 2323 Notes: 2324 The communicator of all `PetscSF`s in `sfs` must be comm. 2325 2326 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2327 2328 The offsets in `leafOffsets` are added to the original leaf indices. 2329 2330 If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2331 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2332 2333 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2334 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2335 2336 All root modes retain the essential connectivity condition. 2337 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2338 Parameter `rootMode` controls how the input root spaces are combined. 2339 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2340 and is also the same in the output `PetscSF`. 2341 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2342 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2343 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2344 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2345 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2346 the original root ranks are ignored. 2347 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2348 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 2349 to keep the load balancing. 2350 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 2351 2352 Example: 2353 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2354 .vb 2355 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2356 for m in {local,global,shared}; do 2357 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2358 done 2359 .ve 2360 we generate two identical `PetscSF`s sf_0 and sf_1, 2361 .vb 2362 PetscSF Object: sf_0 2 MPI processes 2363 type: basic 2364 rank #leaves #roots 2365 [ 0] 4 2 2366 [ 1] 4 2 2367 leaves roots roots in global numbering 2368 ( 0, 0) <- ( 0, 0) = 0 2369 ( 0, 1) <- ( 0, 1) = 1 2370 ( 0, 2) <- ( 1, 0) = 2 2371 ( 0, 3) <- ( 1, 1) = 3 2372 ( 1, 0) <- ( 0, 0) = 0 2373 ( 1, 1) <- ( 0, 1) = 1 2374 ( 1, 2) <- ( 1, 0) = 2 2375 ( 1, 3) <- ( 1, 1) = 3 2376 .ve 2377 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2378 .vb 2379 rootMode = local: 2380 PetscSF Object: result_sf 2 MPI processes 2381 type: basic 2382 rank #leaves #roots 2383 [ 0] 8 4 2384 [ 1] 8 4 2385 leaves roots roots in global numbering 2386 ( 0, 0) <- ( 0, 0) = 0 2387 ( 0, 1) <- ( 0, 1) = 1 2388 ( 0, 2) <- ( 1, 0) = 4 2389 ( 0, 3) <- ( 1, 1) = 5 2390 ( 0, 4) <- ( 0, 2) = 2 2391 ( 0, 5) <- ( 0, 3) = 3 2392 ( 0, 6) <- ( 1, 2) = 6 2393 ( 0, 7) <- ( 1, 3) = 7 2394 ( 1, 0) <- ( 0, 0) = 0 2395 ( 1, 1) <- ( 0, 1) = 1 2396 ( 1, 2) <- ( 1, 0) = 4 2397 ( 1, 3) <- ( 1, 1) = 5 2398 ( 1, 4) <- ( 0, 2) = 2 2399 ( 1, 5) <- ( 0, 3) = 3 2400 ( 1, 6) <- ( 1, 2) = 6 2401 ( 1, 7) <- ( 1, 3) = 7 2402 2403 rootMode = global: 2404 PetscSF Object: result_sf 2 MPI processes 2405 type: basic 2406 rank #leaves #roots 2407 [ 0] 8 4 2408 [ 1] 8 4 2409 leaves roots roots in global numbering 2410 ( 0, 0) <- ( 0, 0) = 0 2411 ( 0, 1) <- ( 0, 1) = 1 2412 ( 0, 2) <- ( 0, 2) = 2 2413 ( 0, 3) <- ( 0, 3) = 3 2414 ( 0, 4) <- ( 1, 0) = 4 2415 ( 0, 5) <- ( 1, 1) = 5 2416 ( 0, 6) <- ( 1, 2) = 6 2417 ( 0, 7) <- ( 1, 3) = 7 2418 ( 1, 0) <- ( 0, 0) = 0 2419 ( 1, 1) <- ( 0, 1) = 1 2420 ( 1, 2) <- ( 0, 2) = 2 2421 ( 1, 3) <- ( 0, 3) = 3 2422 ( 1, 4) <- ( 1, 0) = 4 2423 ( 1, 5) <- ( 1, 1) = 5 2424 ( 1, 6) <- ( 1, 2) = 6 2425 ( 1, 7) <- ( 1, 3) = 7 2426 2427 rootMode = shared: 2428 PetscSF Object: result_sf 2 MPI processes 2429 type: basic 2430 rank #leaves #roots 2431 [ 0] 8 2 2432 [ 1] 8 2 2433 leaves roots roots in global numbering 2434 ( 0, 0) <- ( 0, 0) = 0 2435 ( 0, 1) <- ( 0, 1) = 1 2436 ( 0, 2) <- ( 1, 0) = 2 2437 ( 0, 3) <- ( 1, 1) = 3 2438 ( 0, 4) <- ( 0, 0) = 0 2439 ( 0, 5) <- ( 0, 1) = 1 2440 ( 0, 6) <- ( 1, 0) = 2 2441 ( 0, 7) <- ( 1, 1) = 3 2442 ( 1, 0) <- ( 0, 0) = 0 2443 ( 1, 1) <- ( 0, 1) = 1 2444 ( 1, 2) <- ( 1, 0) = 2 2445 ( 1, 3) <- ( 1, 1) = 3 2446 ( 1, 4) <- ( 0, 0) = 0 2447 ( 1, 5) <- ( 0, 1) = 1 2448 ( 1, 6) <- ( 1, 0) = 2 2449 ( 1, 7) <- ( 1, 1) = 3 2450 .ve 2451 2452 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2453 @*/ 2454 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2455 { 2456 PetscInt i, s, nLeaves, nRoots; 2457 PetscInt *leafArrayOffsets; 2458 PetscInt *ilocal_new; 2459 PetscSFNode *iremote_new; 2460 PetscBool all_ilocal_null = PETSC_FALSE; 2461 PetscLayout glayout = NULL; 2462 PetscInt *gremote = NULL; 2463 PetscMPIInt rank, size; 2464 2465 PetscFunctionBegin; 2466 if (PetscDefined(USE_DEBUG)) { 2467 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2468 2469 PetscCall(PetscSFCreate(comm, &dummy)); 2470 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2471 PetscAssertPointer(sfs, 3); 2472 for (i = 0; i < nsfs; i++) { 2473 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2474 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2475 } 2476 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2477 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2478 PetscAssertPointer(newsf, 6); 2479 PetscCall(PetscSFDestroy(&dummy)); 2480 } 2481 if (!nsfs) { 2482 PetscCall(PetscSFCreate(comm, newsf)); 2483 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2484 PetscFunctionReturn(PETSC_SUCCESS); 2485 } 2486 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2487 PetscCallMPI(MPI_Comm_size(comm, &size)); 2488 2489 /* Calculate leaf array offsets */ 2490 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2491 leafArrayOffsets[0] = 0; 2492 for (s = 0; s < nsfs; s++) { 2493 PetscInt nl; 2494 2495 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2496 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2497 } 2498 nLeaves = leafArrayOffsets[nsfs]; 2499 2500 /* Calculate number of roots */ 2501 switch (rootMode) { 2502 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2503 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2504 if (PetscDefined(USE_DEBUG)) { 2505 for (s = 1; s < nsfs; s++) { 2506 PetscInt nr; 2507 2508 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2509 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2510 } 2511 } 2512 } break; 2513 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2514 /* Calculate also global layout in this case */ 2515 PetscInt *nls; 2516 PetscLayout *lts; 2517 PetscInt **inds; 2518 PetscInt j; 2519 PetscInt rootOffset = 0; 2520 2521 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2522 PetscCall(PetscLayoutCreate(comm, &glayout)); 2523 glayout->bs = 1; 2524 glayout->n = 0; 2525 glayout->N = 0; 2526 for (s = 0; s < nsfs; s++) { 2527 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2528 glayout->n += lts[s]->n; 2529 glayout->N += lts[s]->N; 2530 } 2531 PetscCall(PetscLayoutSetUp(glayout)); 2532 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2533 for (s = 0, j = 0; s < nsfs; s++) { 2534 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2535 rootOffset += lts[s]->N; 2536 PetscCall(PetscLayoutDestroy(<s[s])); 2537 PetscCall(PetscFree(inds[s])); 2538 } 2539 PetscCall(PetscFree3(lts, nls, inds)); 2540 nRoots = glayout->N; 2541 } break; 2542 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2543 /* nRoots calculated later in this case */ 2544 break; 2545 default: 2546 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2547 } 2548 2549 if (!leafOffsets) { 2550 all_ilocal_null = PETSC_TRUE; 2551 for (s = 0; s < nsfs; s++) { 2552 const PetscInt *ilocal; 2553 2554 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2555 if (ilocal) { 2556 all_ilocal_null = PETSC_FALSE; 2557 break; 2558 } 2559 } 2560 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2561 } 2562 2563 /* Renumber and concatenate local leaves */ 2564 ilocal_new = NULL; 2565 if (!all_ilocal_null) { 2566 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2567 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2568 for (s = 0; s < nsfs; s++) { 2569 const PetscInt *ilocal; 2570 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2571 PetscInt i, nleaves_l; 2572 2573 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2574 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2575 } 2576 } 2577 2578 /* Renumber and concatenate remote roots */ 2579 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2580 PetscInt rootOffset = 0; 2581 2582 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2583 for (i = 0; i < nLeaves; i++) { 2584 iremote_new[i].rank = -1; 2585 iremote_new[i].index = -1; 2586 } 2587 for (s = 0; s < nsfs; s++) { 2588 PetscInt i, nl, nr; 2589 PetscSF tmp_sf; 2590 const PetscSFNode *iremote; 2591 PetscSFNode *tmp_rootdata; 2592 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2593 2594 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2595 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2596 /* create helper SF with contiguous leaves */ 2597 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2598 PetscCall(PetscSFSetUp(tmp_sf)); 2599 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2600 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2601 for (i = 0; i < nr; i++) { 2602 tmp_rootdata[i].index = i + rootOffset; 2603 tmp_rootdata[i].rank = rank; 2604 } 2605 rootOffset += nr; 2606 } else { 2607 for (i = 0; i < nr; i++) { 2608 tmp_rootdata[i].index = i; 2609 tmp_rootdata[i].rank = rank; 2610 } 2611 } 2612 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2613 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2614 PetscCall(PetscSFDestroy(&tmp_sf)); 2615 PetscCall(PetscFree(tmp_rootdata)); 2616 } 2617 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2618 2619 /* Build the new SF */ 2620 PetscCall(PetscSFCreate(comm, newsf)); 2621 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2622 } else { 2623 /* Build the new SF */ 2624 PetscCall(PetscSFCreate(comm, newsf)); 2625 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2626 } 2627 PetscCall(PetscSFSetUp(*newsf)); 2628 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2629 PetscCall(PetscLayoutDestroy(&glayout)); 2630 PetscCall(PetscFree(gremote)); 2631 PetscCall(PetscFree(leafArrayOffsets)); 2632 PetscFunctionReturn(PETSC_SUCCESS); 2633 } 2634 2635 /*@ 2636 PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications. 2637 2638 Collective 2639 2640 Input Parameters: 2641 + sf - star forest 2642 . unit - the data type contained within the root and leaf data 2643 . rootdata - root data that will be used for multiple PetscSF communications 2644 - leafdata - leaf data that will be used for multiple PetscSF communications 2645 2646 Level: advanced 2647 2648 Notes: 2649 Implementations of `PetscSF` can make optimizations 2650 for repeated communication using the same memory regions, but these optimizations 2651 can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed. 2652 The intended pattern is 2653 2654 .vb 2655 PetscMalloc2(nroots, &rootdata, nleaves, &leafdata); 2656 2657 PetscSFRegisterPersistent(sf, unit, rootdata, leafdata); 2658 // repeated use of rootdata and leafdata will now be optimized 2659 2660 PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE); 2661 PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE); 2662 // ... 2663 PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM); 2664 PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM); 2665 // ... (other communications) 2666 2667 // rootdata and leafdata must be deregistered before freeing 2668 // skipping this can lead to undefined behavior including 2669 // deadlocks 2670 PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata); 2671 2672 // it is now safe to free rootdata and leafdata 2673 PetscFree2(rootdata, leafdata); 2674 .ve 2675 2676 If you do not register `rootdata` and `leafdata` it will not cause an error, 2677 but optimizations that reduce the setup time for each communication cannot be 2678 made. Currently, the only implementation of `PetscSF` that benefits from 2679 `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default 2680 `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`. 2681 2682 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()` 2683 @*/ 2684 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2685 { 2686 PetscFunctionBegin; 2687 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2688 PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2689 PetscFunctionReturn(PETSC_SUCCESS); 2690 } 2691 2692 /*@ 2693 PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded. 2694 2695 Collective 2696 2697 Input Parameters: 2698 + sf - star forest 2699 . unit - the data type contained within the root and leaf data 2700 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()` 2701 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()` 2702 2703 Level: advanced 2704 2705 Note: 2706 See `PetscSFRegisterPersistent()` for when/how to use this function. 2707 2708 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()` 2709 @*/ 2710 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2711 { 2712 PetscFunctionBegin; 2713 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2714 PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2715 PetscFunctionReturn(PETSC_SUCCESS); 2716 } 2717 2718 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size) 2719 { 2720 MPI_Aint lb, lb_true, bytes, bytes_true; 2721 2722 PetscFunctionBegin; 2723 PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 2724 PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 2725 PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 2726 *size = bytes; 2727 PetscFunctionReturn(PETSC_SUCCESS); 2728 } 2729