1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #include <petscdevice_cuda.h> 9 #endif 10 11 #if defined(PETSC_HAVE_HIP) 12 #include <hip/hip_runtime.h> 13 #endif 14 15 #if defined(PETSC_CLANG_STATIC_ANALYZER) 16 extern void PetscSFCheckGraphSet(PetscSF, int); 17 #else 18 #if defined(PETSC_USE_DEBUG) 19 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 20 #else 21 #define PetscSFCheckGraphSet(sf, arg) \ 22 do { \ 23 } while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 29 30 /*@ 31 PetscSFCreate - create a star forest communication context 32 33 Collective 34 35 Input Parameter: 36 . comm - communicator on which the star forest will operate 37 38 Output Parameter: 39 . sf - new star forest context 40 41 Options Database Key: 42 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 43 . -sf_type window - Use MPI-3 one-sided window for communication 44 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 46 47 Level: intermediate 48 49 Note: 50 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 51 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 52 `SF`s are optimized and they have better performance than the general `SF`s. 53 54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 55 @*/ 56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 57 { 58 PetscSF b; 59 60 PetscFunctionBegin; 61 PetscAssertPointer(sf, 2); 62 PetscCall(PetscSFInitializePackage()); 63 64 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 65 b->nroots = -1; 66 b->nleaves = -1; 67 b->minleaf = PETSC_INT_MAX; 68 b->maxleaf = PETSC_INT_MIN; 69 b->nranks = -1; 70 b->rankorder = PETSC_TRUE; 71 b->ingroup = MPI_GROUP_NULL; 72 b->outgroup = MPI_GROUP_NULL; 73 b->graphset = PETSC_FALSE; 74 #if defined(PETSC_HAVE_DEVICE) 75 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 76 b->use_stream_aware_mpi = PETSC_FALSE; 77 b->unknown_input_stream = PETSC_FALSE; 78 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 79 b->backend = PETSCSF_BACKEND_KOKKOS; 80 #elif defined(PETSC_HAVE_CUDA) 81 b->backend = PETSCSF_BACKEND_CUDA; 82 #elif defined(PETSC_HAVE_HIP) 83 b->backend = PETSCSF_BACKEND_HIP; 84 #endif 85 86 #if defined(PETSC_HAVE_NVSHMEM) 87 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 88 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 89 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 90 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 91 #endif 92 #endif 93 b->vscat.from_n = -1; 94 b->vscat.to_n = -1; 95 b->vscat.unit = MPIU_SCALAR; 96 *sf = b; 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 /*@ 101 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 102 103 Collective 104 105 Input Parameter: 106 . sf - star forest 107 108 Level: advanced 109 110 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 111 @*/ 112 PetscErrorCode PetscSFReset(PetscSF sf) 113 { 114 PetscFunctionBegin; 115 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 116 PetscTryTypeMethod(sf, Reset); 117 PetscCall(PetscSFDestroy(&sf->rankssf)); 118 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_INT_MAX; 122 sf->maxleaf = PETSC_INT_MIN; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 PetscCall(PetscFree(sf->mine_alloc)); 127 PetscCall(PetscFree(sf->remote_alloc)); 128 sf->nranks = -1; 129 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 130 sf->degreeknown = PETSC_FALSE; 131 PetscCall(PetscFree(sf->degree)); 132 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 133 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 134 135 if (sf->multi) sf->multi->multi = NULL; 136 PetscCall(PetscSFDestroy(&sf->multi)); 137 138 PetscCall(PetscLayoutDestroy(&sf->map)); 139 140 #if defined(PETSC_HAVE_DEVICE) 141 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 142 #endif 143 144 sf->setupcalled = PETSC_FALSE; 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 /*@ 149 PetscSFSetType - Set the `PetscSF` communication implementation 150 151 Collective 152 153 Input Parameters: 154 + sf - the `PetscSF` context 155 - type - a known method 156 .vb 157 PETSCSFWINDOW - MPI-2/3 one-sided 158 PETSCSFBASIC - basic implementation using MPI-1 two-sided 159 .ve 160 161 Options Database Key: 162 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 163 164 Level: intermediate 165 166 Notes: 167 See `PetscSFType` for possible values 168 169 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 170 @*/ 171 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 172 { 173 PetscBool match; 174 PetscErrorCode (*r)(PetscSF); 175 176 PetscFunctionBegin; 177 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 178 PetscAssertPointer(type, 2); 179 180 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 181 if (match) PetscFunctionReturn(PETSC_SUCCESS); 182 183 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 184 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 185 /* Destroy the previous PetscSF implementation context */ 186 PetscTryTypeMethod(sf, Destroy); 187 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 188 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 189 PetscCall((*r)(sf)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 /*@ 194 PetscSFGetType - Get the `PetscSF` communication implementation 195 196 Not Collective 197 198 Input Parameter: 199 . sf - the `PetscSF` context 200 201 Output Parameter: 202 . type - the `PetscSF` type name 203 204 Level: intermediate 205 206 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 207 @*/ 208 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 209 { 210 PetscFunctionBegin; 211 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 212 PetscAssertPointer(type, 2); 213 *type = ((PetscObject)sf)->type_name; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 /*@ 218 PetscSFDestroy - destroy a star forest 219 220 Collective 221 222 Input Parameter: 223 . sf - address of star forest 224 225 Level: intermediate 226 227 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 228 @*/ 229 PetscErrorCode PetscSFDestroy(PetscSF *sf) 230 { 231 PetscFunctionBegin; 232 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 233 PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 234 if (--((PetscObject)*sf)->refct > 0) { 235 *sf = NULL; 236 PetscFunctionReturn(PETSC_SUCCESS); 237 } 238 PetscCall(PetscSFReset(*sf)); 239 PetscTryTypeMethod(*sf, Destroy); 240 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 241 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 242 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 243 if ((*sf)->use_stream_aware_mpi) { 244 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 245 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 246 } 247 #endif 248 PetscCall(PetscHeaderDestroy(sf)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 253 { 254 PetscInt i, nleaves; 255 PetscMPIInt size; 256 const PetscInt *ilocal; 257 const PetscSFNode *iremote; 258 259 PetscFunctionBegin; 260 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 261 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 262 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 263 for (i = 0; i < nleaves; i++) { 264 const PetscInt rank = iremote[i].rank; 265 const PetscInt remote = iremote[i].index; 266 const PetscInt leaf = ilocal ? ilocal[i] : i; 267 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 268 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 269 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 270 } 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 /*@ 275 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 276 277 Collective 278 279 Input Parameter: 280 . sf - star forest communication object 281 282 Level: beginner 283 284 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 285 @*/ 286 PetscErrorCode PetscSFSetUp(PetscSF sf) 287 { 288 PetscFunctionBegin; 289 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 290 PetscSFCheckGraphSet(sf, 1); 291 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 292 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 293 PetscCall(PetscSFCheckGraphValid_Private(sf)); 294 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 295 PetscTryTypeMethod(sf, SetUp); 296 #if defined(PETSC_HAVE_CUDA) 297 if (sf->backend == PETSCSF_BACKEND_CUDA) { 298 sf->ops->Malloc = PetscSFMalloc_CUDA; 299 sf->ops->Free = PetscSFFree_CUDA; 300 } 301 #endif 302 #if defined(PETSC_HAVE_HIP) 303 if (sf->backend == PETSCSF_BACKEND_HIP) { 304 sf->ops->Malloc = PetscSFMalloc_HIP; 305 sf->ops->Free = PetscSFFree_HIP; 306 } 307 #endif 308 309 #if defined(PETSC_HAVE_KOKKOS) 310 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 311 sf->ops->Malloc = PetscSFMalloc_Kokkos; 312 sf->ops->Free = PetscSFFree_Kokkos; 313 } 314 #endif 315 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 316 sf->setupcalled = PETSC_TRUE; 317 PetscFunctionReturn(PETSC_SUCCESS); 318 } 319 320 /*@ 321 PetscSFSetFromOptions - set `PetscSF` options using the options database 322 323 Logically Collective 324 325 Input Parameter: 326 . sf - star forest 327 328 Options Database Keys: 329 + -sf_type - implementation type, see `PetscSFSetType()` 330 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 331 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 332 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 333 If true, this option only works with `-use_gpu_aware_mpi 1`. 334 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 335 If true, this option only works with `-use_gpu_aware_mpi 1`. 336 337 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 338 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 339 the only available is kokkos. 340 341 Level: intermediate 342 343 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 344 @*/ 345 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 346 { 347 PetscSFType deft; 348 char type[256]; 349 PetscBool flg; 350 351 PetscFunctionBegin; 352 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 353 PetscObjectOptionsBegin((PetscObject)sf); 354 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 355 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 356 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 357 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 358 PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL)); 359 #if defined(PETSC_HAVE_DEVICE) 360 { 361 char backendstr[32] = {0}; 362 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 363 /* Change the defaults set in PetscSFCreate() with command line options */ 364 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 365 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 366 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 367 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 368 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 369 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 370 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 371 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 372 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 373 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 374 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 375 #elif defined(PETSC_HAVE_KOKKOS) 376 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 377 #endif 378 379 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 380 if (sf->use_stream_aware_mpi) { 381 MPI_Info info; 382 383 PetscCallMPI(MPI_Info_create(&info)); 384 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 385 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 386 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 387 PetscCallMPI(MPI_Info_free(&info)); 388 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 389 } 390 #endif 391 } 392 #endif 393 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 394 PetscOptionsEnd(); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 /*@ 399 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 400 401 Logically Collective 402 403 Input Parameters: 404 + sf - star forest 405 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 406 407 Level: advanced 408 409 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 410 @*/ 411 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 412 { 413 PetscFunctionBegin; 414 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 415 PetscValidLogicalCollectiveBool(sf, flg, 2); 416 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 417 sf->rankorder = flg; 418 PetscFunctionReturn(PETSC_SUCCESS); 419 } 420 421 /*@ 422 PetscSFSetGraph - Set a parallel star forest 423 424 Collective 425 426 Input Parameters: 427 + sf - star forest 428 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 429 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 430 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 431 during setup in debug mode) 432 . localmode - copy mode for `ilocal` 433 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 434 during setup in debug mode) 435 - remotemode - copy mode for `iremote` 436 437 Level: intermediate 438 439 Notes: 440 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 441 442 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 443 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 444 PETSc might modify the respective array; 445 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 446 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 447 448 Fortran Notes: 449 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 450 451 Developer Notes: 452 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 453 This also allows to compare leaf sets of two `PetscSF`s easily. 454 455 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 456 @*/ 457 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 458 { 459 PetscBool unique, contiguous; 460 461 PetscFunctionBegin; 462 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 463 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 464 if (nleaves > 0) PetscAssertPointer(iremote, 6); 465 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 466 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 467 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 468 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 469 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 470 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 471 472 if (sf->nroots >= 0) { /* Reset only if graph already set */ 473 PetscCall(PetscSFReset(sf)); 474 } 475 476 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 477 if (PetscDefined(USE_DEBUG)) { 478 PetscMPIInt size; 479 480 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 481 for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); } 482 } 483 484 sf->nroots = nroots; 485 sf->nleaves = nleaves; 486 487 if (localmode == PETSC_COPY_VALUES && ilocal) { 488 PetscInt *tlocal = NULL; 489 490 PetscCall(PetscMalloc1(nleaves, &tlocal)); 491 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 492 ilocal = tlocal; 493 } 494 if (remotemode == PETSC_COPY_VALUES) { 495 PetscSFNode *tremote = NULL; 496 497 PetscCall(PetscMalloc1(nleaves, &tremote)); 498 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 499 iremote = tremote; 500 } 501 502 if (nleaves && ilocal) { 503 PetscSFNode work; 504 505 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 506 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 507 unique = PetscNot(unique); 508 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 509 sf->minleaf = ilocal[0]; 510 sf->maxleaf = ilocal[nleaves - 1]; 511 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 512 } else { 513 sf->minleaf = 0; 514 sf->maxleaf = nleaves - 1; 515 unique = PETSC_TRUE; 516 contiguous = PETSC_TRUE; 517 } 518 519 if (contiguous) { 520 if (localmode == PETSC_USE_POINTER) { 521 ilocal = NULL; 522 } else { 523 PetscCall(PetscFree(ilocal)); 524 } 525 } 526 sf->mine = ilocal; 527 if (localmode == PETSC_USE_POINTER) { 528 sf->mine_alloc = NULL; 529 } else { 530 sf->mine_alloc = ilocal; 531 } 532 if (PetscDefined(USE_DEBUG)) { 533 PetscMPIInt size; 534 535 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 536 for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); } 537 } 538 sf->remote = iremote; 539 if (remotemode == PETSC_USE_POINTER) { 540 sf->remote_alloc = NULL; 541 } else { 542 sf->remote_alloc = iremote; 543 } 544 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 545 sf->graphset = PETSC_TRUE; 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 /*@ 550 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 551 552 Collective 553 554 Input Parameters: 555 + sf - The `PetscSF` 556 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 557 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 558 559 Level: intermediate 560 561 Notes: 562 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 563 `n` and `N` are the local and global sizes of `x` respectively. 564 565 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 566 sequential vectors `y` on all MPI processes. 567 568 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 569 sequential vector `y` on rank 0. 570 571 In above cases, entries of `x` are roots and entries of `y` are leaves. 572 573 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 574 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 575 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 576 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 577 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 578 579 In this case, roots and leaves are symmetric. 580 581 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 582 @*/ 583 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 584 { 585 MPI_Comm comm; 586 PetscInt n, N, res[2]; 587 PetscMPIInt rank, size; 588 PetscSFType type; 589 590 PetscFunctionBegin; 591 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 592 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 593 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 594 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 595 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 596 PetscCallMPI(MPI_Comm_size(comm, &size)); 597 598 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 599 type = PETSCSFALLTOALL; 600 PetscCall(PetscLayoutCreate(comm, &sf->map)); 601 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 602 PetscCall(PetscLayoutSetSize(sf->map, (PetscInt)size * size)); 603 PetscCall(PetscLayoutSetUp(sf->map)); 604 } else { 605 PetscCall(PetscLayoutGetLocalSize(map, &n)); 606 PetscCall(PetscLayoutGetSize(map, &N)); 607 res[0] = n; 608 res[1] = -n; 609 /* Check if n are same over all ranks so that we can optimize it */ 610 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 611 if (res[0] == -res[1]) { /* same n */ 612 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 613 } else { 614 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 615 } 616 PetscCall(PetscLayoutReference(map, &sf->map)); 617 } 618 PetscCall(PetscSFSetType(sf, type)); 619 620 sf->pattern = pattern; 621 sf->mine = NULL; /* Contiguous */ 622 623 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 624 Also set other easy stuff. 625 */ 626 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 627 sf->nleaves = N; 628 sf->nroots = n; 629 sf->nranks = size; 630 sf->minleaf = 0; 631 sf->maxleaf = N - 1; 632 } else if (pattern == PETSCSF_PATTERN_GATHER) { 633 sf->nleaves = rank ? 0 : N; 634 sf->nroots = n; 635 sf->nranks = rank ? 0 : size; 636 sf->minleaf = 0; 637 sf->maxleaf = rank ? -1 : N - 1; 638 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 639 sf->nleaves = size; 640 sf->nroots = size; 641 sf->nranks = size; 642 sf->minleaf = 0; 643 sf->maxleaf = size - 1; 644 } 645 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 646 sf->graphset = PETSC_TRUE; 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 /*@ 651 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 652 653 Collective 654 655 Input Parameter: 656 . sf - star forest to invert 657 658 Output Parameter: 659 . isf - inverse of `sf` 660 661 Level: advanced 662 663 Notes: 664 All roots must have degree 1. 665 666 The local space may be a permutation, but cannot be sparse. 667 668 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 669 @*/ 670 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 671 { 672 PetscMPIInt rank; 673 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 674 const PetscInt *ilocal; 675 PetscSFNode *roots, *leaves; 676 677 PetscFunctionBegin; 678 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 679 PetscSFCheckGraphSet(sf, 1); 680 PetscAssertPointer(isf, 2); 681 682 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 683 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 684 685 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 686 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 687 for (i = 0; i < maxlocal; i++) { 688 leaves[i].rank = rank; 689 leaves[i].index = i; 690 } 691 for (i = 0; i < nroots; i++) { 692 roots[i].rank = -1; 693 roots[i].index = -1; 694 } 695 PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 696 PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 697 698 /* Check whether our leaves are sparse */ 699 for (i = 0, count = 0; i < nroots; i++) 700 if (roots[i].rank >= 0) count++; 701 if (count == nroots) newilocal = NULL; 702 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 703 for (i = 0, count = 0; i < nroots; i++) { 704 if (roots[i].rank >= 0) { 705 newilocal[count] = i; 706 roots[count].rank = roots[i].rank; 707 roots[count].index = roots[i].index; 708 count++; 709 } 710 } 711 } 712 713 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 714 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 715 PetscCall(PetscFree2(roots, leaves)); 716 PetscFunctionReturn(PETSC_SUCCESS); 717 } 718 719 /*@ 720 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 721 722 Collective 723 724 Input Parameters: 725 + sf - communication object to duplicate 726 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 727 728 Output Parameter: 729 . newsf - new communication object 730 731 Level: beginner 732 733 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 734 @*/ 735 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 736 { 737 PetscSFType type; 738 MPI_Datatype dtype = MPIU_SCALAR; 739 740 PetscFunctionBegin; 741 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 742 PetscValidLogicalCollectiveEnum(sf, opt, 2); 743 PetscAssertPointer(newsf, 3); 744 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 745 PetscCall(PetscSFGetType(sf, &type)); 746 if (type) PetscCall(PetscSFSetType(*newsf, type)); 747 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 748 if (opt == PETSCSF_DUPLICATE_GRAPH) { 749 PetscSFCheckGraphSet(sf, 1); 750 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 751 PetscInt nroots, nleaves; 752 const PetscInt *ilocal; 753 const PetscSFNode *iremote; 754 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 755 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 756 } else { 757 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 758 } 759 } 760 /* Since oldtype is committed, so is newtype, according to MPI */ 761 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 762 (*newsf)->vscat.bs = sf->vscat.bs; 763 (*newsf)->vscat.unit = dtype; 764 (*newsf)->vscat.to_n = sf->vscat.to_n; 765 (*newsf)->vscat.from_n = sf->vscat.from_n; 766 /* Do not copy lsf. Build it on demand since it is rarely used */ 767 768 #if defined(PETSC_HAVE_DEVICE) 769 (*newsf)->backend = sf->backend; 770 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 771 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 772 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 773 #endif 774 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 775 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 776 PetscFunctionReturn(PETSC_SUCCESS); 777 } 778 779 /*@C 780 PetscSFGetGraph - Get the graph specifying a parallel star forest 781 782 Not Collective 783 784 Input Parameter: 785 . sf - star forest 786 787 Output Parameters: 788 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 789 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 790 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 791 - iremote - remote locations of root vertices for each leaf on the current process 792 793 Level: intermediate 794 795 Notes: 796 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 797 798 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 799 800 Fortran Notes: 801 The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you 802 want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array. 803 804 To check for a `NULL` `ilocal` use 805 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 806 807 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 808 @*/ 809 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 810 { 811 PetscFunctionBegin; 812 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 813 if (sf->ops->GetGraph) { 814 PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 815 } else { 816 if (nroots) *nroots = sf->nroots; 817 if (nleaves) *nleaves = sf->nleaves; 818 if (ilocal) *ilocal = sf->mine; 819 if (iremote) *iremote = sf->remote; 820 } 821 PetscFunctionReturn(PETSC_SUCCESS); 822 } 823 824 /*@ 825 PetscSFGetLeafRange - Get the active leaf ranges 826 827 Not Collective 828 829 Input Parameter: 830 . sf - star forest 831 832 Output Parameters: 833 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 834 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 835 836 Level: developer 837 838 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 839 @*/ 840 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 841 { 842 PetscFunctionBegin; 843 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 844 PetscSFCheckGraphSet(sf, 1); 845 if (minleaf) *minleaf = sf->minleaf; 846 if (maxleaf) *maxleaf = sf->maxleaf; 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 /*@ 851 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 852 853 Collective 854 855 Input Parameters: 856 + A - the star forest 857 . obj - Optional object that provides the prefix for the option names 858 - name - command line option 859 860 Level: intermediate 861 862 Note: 863 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 864 865 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 866 @*/ 867 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 868 { 869 PetscFunctionBegin; 870 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 871 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 872 PetscFunctionReturn(PETSC_SUCCESS); 873 } 874 875 /*@ 876 PetscSFView - view a star forest 877 878 Collective 879 880 Input Parameters: 881 + sf - star forest 882 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 883 884 Level: beginner 885 886 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 887 @*/ 888 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 889 { 890 PetscBool iascii; 891 PetscViewerFormat format; 892 893 PetscFunctionBegin; 894 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 895 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 896 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 897 PetscCheckSameComm(sf, 1, viewer, 2); 898 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 899 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 900 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 901 PetscMPIInt rank; 902 PetscInt j; 903 904 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 905 PetscCall(PetscViewerASCIIPushTab(viewer)); 906 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 907 if (!sf->graphset) { 908 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 909 PetscCall(PetscViewerASCIIPopTab(viewer)); 910 PetscFunctionReturn(PETSC_SUCCESS); 911 } 912 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 913 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 914 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 915 for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%d,%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, (PetscMPIInt)sf->remote[i].rank, sf->remote[i].index)); 916 PetscCall(PetscViewerFlush(viewer)); 917 PetscCall(PetscViewerGetFormat(viewer, &format)); 918 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 919 PetscMPIInt *tmpranks, *perm; 920 921 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 922 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 923 for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i; 924 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 925 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 926 for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) { 927 PetscMPIInt i = perm[ii]; 928 929 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 930 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 931 } 932 PetscCall(PetscFree2(tmpranks, perm)); 933 } 934 PetscCall(PetscViewerFlush(viewer)); 935 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 936 } 937 PetscCall(PetscViewerASCIIPopTab(viewer)); 938 } 939 PetscTryTypeMethod(sf, View, viewer); 940 PetscFunctionReturn(PETSC_SUCCESS); 941 } 942 943 /*@C 944 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 945 946 Not Collective 947 948 Input Parameter: 949 . sf - star forest 950 951 Output Parameters: 952 + nranks - number of ranks referenced by local part 953 . ranks - [`nranks`] array of ranks 954 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 955 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL` 956 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL` 957 958 Level: developer 959 960 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 961 @*/ 962 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 963 { 964 PetscFunctionBegin; 965 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 966 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 967 if (sf->ops->GetRootRanks) { 968 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 969 } else { 970 /* The generic implementation */ 971 if (nranks) *nranks = sf->nranks; 972 if (ranks) *ranks = sf->ranks; 973 if (roffset) *roffset = sf->roffset; 974 if (rmine) *rmine = sf->rmine; 975 if (rremote) *rremote = sf->rremote; 976 } 977 PetscFunctionReturn(PETSC_SUCCESS); 978 } 979 980 /*@C 981 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 982 983 Not Collective 984 985 Input Parameter: 986 . sf - star forest 987 988 Output Parameters: 989 + niranks - number of leaf ranks referencing roots on this process 990 . iranks - [`niranks`] array of ranks 991 . ioffset - [`niranks`+1] offset in `irootloc` for each rank 992 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 993 994 Level: developer 995 996 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 997 @*/ 998 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 999 { 1000 PetscFunctionBegin; 1001 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1002 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 1003 if (sf->ops->GetLeafRanks) { 1004 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 1005 } else { 1006 PetscSFType type; 1007 PetscCall(PetscSFGetType(sf, &type)); 1008 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1009 } 1010 PetscFunctionReturn(PETSC_SUCCESS); 1011 } 1012 1013 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 1014 { 1015 PetscInt i; 1016 for (i = 0; i < n; i++) { 1017 if (needle == list[i]) return PETSC_TRUE; 1018 } 1019 return PETSC_FALSE; 1020 } 1021 1022 /*@C 1023 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 1024 1025 Collective 1026 1027 Input Parameters: 1028 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1029 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1030 1031 Level: developer 1032 1033 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 1034 @*/ 1035 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1036 { 1037 PetscHMapI table; 1038 PetscHashIter pos; 1039 PetscMPIInt size, groupsize, *groupranks, *ranks; 1040 PetscInt *rcount; 1041 PetscInt irank, sfnrank, ranksi; 1042 PetscMPIInt i, orank = -1; 1043 1044 PetscFunctionBegin; 1045 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1046 PetscSFCheckGraphSet(sf, 1); 1047 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1048 PetscCall(PetscHMapICreateWithSize(10, &table)); 1049 for (i = 0; i < sf->nleaves; i++) { 1050 /* Log 1-based rank */ 1051 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1052 } 1053 PetscCall(PetscHMapIGetSize(table, &sfnrank)); 1054 PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks)); 1055 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1056 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1057 PetscHashIterBegin(table, pos); 1058 for (i = 0; i < sf->nranks; i++) { 1059 PetscHashIterGetKey(table, pos, ranksi); 1060 PetscCall(PetscMPIIntCast(ranksi, &ranks[i])); 1061 PetscHashIterGetVal(table, pos, rcount[i]); 1062 PetscHashIterNext(table, pos); 1063 ranks[i]--; /* Convert back to 0-based */ 1064 } 1065 PetscCall(PetscHMapIDestroy(&table)); 1066 1067 /* We expect that dgroup is reliably "small" while nranks could be large */ 1068 { 1069 MPI_Group group = MPI_GROUP_NULL; 1070 PetscMPIInt *dgroupranks; 1071 1072 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1073 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1074 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1075 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1076 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1077 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1078 PetscCallMPI(MPI_Group_free(&group)); 1079 PetscCall(PetscFree(dgroupranks)); 1080 } 1081 1082 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1083 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1084 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1085 if (InList(ranks[i], groupsize, groupranks)) break; 1086 } 1087 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1088 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1089 } 1090 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1091 PetscMPIInt tmprank; 1092 PetscInt tmpcount; 1093 1094 tmprank = ranks[i]; 1095 tmpcount = rcount[i]; 1096 ranks[i] = ranks[sf->ndranks]; 1097 rcount[i] = rcount[sf->ndranks]; 1098 ranks[sf->ndranks] = tmprank; 1099 rcount[sf->ndranks] = tmpcount; 1100 sf->ndranks++; 1101 } 1102 } 1103 PetscCall(PetscFree(groupranks)); 1104 PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount)); 1105 if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1106 sf->roffset[0] = 0; 1107 for (i = 0; i < sf->nranks; i++) { 1108 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1109 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1110 rcount[i] = 0; 1111 } 1112 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1113 /* short circuit */ 1114 if (orank != sf->remote[i].rank) { 1115 /* Search for index of iremote[i].rank in sf->ranks */ 1116 PetscCall(PetscFindMPIInt((PetscMPIInt)sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1117 if (irank < 0) { 1118 PetscCall(PetscFindMPIInt((PetscMPIInt)sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1119 if (irank >= 0) irank += sf->ndranks; 1120 } 1121 orank = (PetscMPIInt)sf->remote[i].rank; 1122 } 1123 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", (PetscMPIInt)sf->remote[i].rank); 1124 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1125 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1126 rcount[irank]++; 1127 } 1128 PetscCall(PetscFree2(rcount, ranks)); 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 /*@C 1133 PetscSFGetGroups - gets incoming and outgoing process groups 1134 1135 Collective 1136 1137 Input Parameter: 1138 . sf - star forest 1139 1140 Output Parameters: 1141 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1142 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1143 1144 Level: developer 1145 1146 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1147 @*/ 1148 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1149 { 1150 MPI_Group group = MPI_GROUP_NULL; 1151 1152 PetscFunctionBegin; 1153 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1154 if (sf->ingroup == MPI_GROUP_NULL) { 1155 PetscInt i; 1156 const PetscInt *indegree; 1157 PetscMPIInt rank, *outranks, *inranks, indegree0; 1158 PetscSFNode *remote; 1159 PetscSF bgcount; 1160 1161 /* Compute the number of incoming ranks */ 1162 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1163 for (i = 0; i < sf->nranks; i++) { 1164 remote[i].rank = sf->ranks[i]; 1165 remote[i].index = 0; 1166 } 1167 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1168 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1169 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1170 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1171 /* Enumerate the incoming ranks */ 1172 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1173 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1174 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1175 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1176 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1177 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1178 PetscCall(PetscMPIIntCast(indegree[0], &indegree0)); 1179 PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup)); 1180 PetscCallMPI(MPI_Group_free(&group)); 1181 PetscCall(PetscFree2(inranks, outranks)); 1182 PetscCall(PetscSFDestroy(&bgcount)); 1183 } 1184 *incoming = sf->ingroup; 1185 1186 if (sf->outgroup == MPI_GROUP_NULL) { 1187 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1188 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1189 PetscCallMPI(MPI_Group_free(&group)); 1190 } 1191 *outgoing = sf->outgroup; 1192 PetscFunctionReturn(PETSC_SUCCESS); 1193 } 1194 1195 /*@ 1196 PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 1197 1198 Collective 1199 1200 Input Parameter: 1201 . sf - star forest 1202 1203 Output Parameter: 1204 . rsf - the star forest with a single root per process to perform communications 1205 1206 Level: developer 1207 1208 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 1209 @*/ 1210 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 1211 { 1212 PetscFunctionBegin; 1213 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1214 PetscAssertPointer(rsf, 2); 1215 if (!sf->rankssf) { 1216 PetscSFNode *rremotes; 1217 const PetscMPIInt *ranks; 1218 PetscMPIInt nranks; 1219 1220 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 1221 PetscCall(PetscMalloc1(nranks, &rremotes)); 1222 for (PetscInt i = 0; i < nranks; i++) { 1223 rremotes[i].rank = ranks[i]; 1224 rremotes[i].index = 0; 1225 } 1226 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 1227 PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 1228 } 1229 *rsf = sf->rankssf; 1230 PetscFunctionReturn(PETSC_SUCCESS); 1231 } 1232 1233 /*@ 1234 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1235 1236 Collective 1237 1238 Input Parameter: 1239 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1240 1241 Output Parameter: 1242 . multi - star forest with split roots, such that each root has degree exactly 1 1243 1244 Level: developer 1245 1246 Note: 1247 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1248 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1249 edge, it is a candidate for future optimization that might involve its removal. 1250 1251 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1252 @*/ 1253 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1254 { 1255 PetscFunctionBegin; 1256 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1257 PetscAssertPointer(multi, 2); 1258 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1259 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1260 *multi = sf->multi; 1261 sf->multi->multi = sf->multi; 1262 PetscFunctionReturn(PETSC_SUCCESS); 1263 } 1264 if (!sf->multi) { 1265 const PetscInt *indegree; 1266 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1267 PetscSFNode *remote; 1268 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1269 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1270 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1271 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1272 inoffset[0] = 0; 1273 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1274 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1275 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1276 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1277 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1278 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1279 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1280 } 1281 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1282 for (i = 0; i < sf->nleaves; i++) { 1283 remote[i].rank = sf->remote[i].rank; 1284 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1285 } 1286 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1287 sf->multi->multi = sf->multi; 1288 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1289 if (sf->rankorder) { /* Sort the ranks */ 1290 PetscMPIInt rank; 1291 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1292 PetscSFNode *newremote; 1293 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1294 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1295 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1296 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1297 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1298 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1299 /* Sort the incoming ranks at each vertex, build the inverse map */ 1300 for (i = 0; i < sf->nroots; i++) { 1301 PetscInt j; 1302 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1303 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1304 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1305 } 1306 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1307 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1308 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1309 for (i = 0; i < sf->nleaves; i++) { 1310 newremote[i].rank = sf->remote[i].rank; 1311 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1312 } 1313 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1314 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1315 } 1316 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1317 } 1318 *multi = sf->multi; 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 1322 /*@C 1323 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1324 1325 Collective 1326 1327 Input Parameters: 1328 + sf - original star forest 1329 . nselected - number of selected roots on this process 1330 - selected - indices of the selected roots on this process 1331 1332 Output Parameter: 1333 . esf - new star forest 1334 1335 Level: advanced 1336 1337 Note: 1338 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1339 be done by calling PetscSFGetGraph(). 1340 1341 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1342 @*/ 1343 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1344 { 1345 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1346 const PetscInt *ilocal; 1347 signed char *rootdata, *leafdata, *leafmem; 1348 const PetscSFNode *iremote; 1349 PetscSFNode *new_iremote; 1350 MPI_Comm comm; 1351 1352 PetscFunctionBegin; 1353 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1354 PetscSFCheckGraphSet(sf, 1); 1355 if (nselected) PetscAssertPointer(selected, 3); 1356 PetscAssertPointer(esf, 4); 1357 1358 PetscCall(PetscSFSetUp(sf)); 1359 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1360 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1361 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1362 1363 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1364 PetscBool dups; 1365 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1366 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1367 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1368 } 1369 1370 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1371 else { 1372 /* A generic version of creating embedded sf */ 1373 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1374 maxlocal = maxleaf - minleaf + 1; 1375 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1376 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1377 /* Tag selected roots and bcast to leaves */ 1378 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1379 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1380 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1381 1382 /* Build esf with leaves that are still connected */ 1383 esf_nleaves = 0; 1384 for (i = 0; i < nleaves; i++) { 1385 j = ilocal ? ilocal[i] : i; 1386 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1387 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1388 */ 1389 esf_nleaves += (leafdata[j] ? 1 : 0); 1390 } 1391 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1392 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1393 for (i = n = 0; i < nleaves; i++) { 1394 j = ilocal ? ilocal[i] : i; 1395 if (leafdata[j]) { 1396 new_ilocal[n] = j; 1397 new_iremote[n].rank = iremote[i].rank; 1398 new_iremote[n].index = iremote[i].index; 1399 ++n; 1400 } 1401 } 1402 PetscCall(PetscSFCreate(comm, esf)); 1403 PetscCall(PetscSFSetFromOptions(*esf)); 1404 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1405 PetscCall(PetscFree2(rootdata, leafmem)); 1406 } 1407 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1408 PetscFunctionReturn(PETSC_SUCCESS); 1409 } 1410 1411 /*@C 1412 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1413 1414 Collective 1415 1416 Input Parameters: 1417 + sf - original star forest 1418 . nselected - number of selected leaves on this process 1419 - selected - indices of the selected leaves on this process 1420 1421 Output Parameter: 1422 . newsf - new star forest 1423 1424 Level: advanced 1425 1426 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1427 @*/ 1428 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1429 { 1430 const PetscSFNode *iremote; 1431 PetscSFNode *new_iremote; 1432 const PetscInt *ilocal; 1433 PetscInt i, nroots, *leaves, *new_ilocal; 1434 MPI_Comm comm; 1435 1436 PetscFunctionBegin; 1437 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1438 PetscSFCheckGraphSet(sf, 1); 1439 if (nselected) PetscAssertPointer(selected, 3); 1440 PetscAssertPointer(newsf, 4); 1441 1442 /* Uniq selected[] and put results in leaves[] */ 1443 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1444 PetscCall(PetscMalloc1(nselected, &leaves)); 1445 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1446 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1447 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1448 1449 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1450 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1451 else { 1452 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1453 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1454 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1455 for (i = 0; i < nselected; ++i) { 1456 const PetscInt l = leaves[i]; 1457 new_ilocal[i] = ilocal ? ilocal[l] : l; 1458 new_iremote[i].rank = iremote[l].rank; 1459 new_iremote[i].index = iremote[l].index; 1460 } 1461 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1462 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1463 } 1464 PetscCall(PetscFree(leaves)); 1465 PetscFunctionReturn(PETSC_SUCCESS); 1466 } 1467 1468 /*@C 1469 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1470 1471 Collective 1472 1473 Input Parameters: 1474 + sf - star forest on which to communicate 1475 . unit - data type associated with each node 1476 . rootdata - buffer to broadcast 1477 - op - operation to use for reduction 1478 1479 Output Parameter: 1480 . leafdata - buffer to be reduced with values from each leaf's respective root 1481 1482 Level: intermediate 1483 1484 Note: 1485 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1486 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1487 use `PetscSFBcastWithMemTypeBegin()` instead. 1488 1489 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1490 @*/ 1491 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1492 { 1493 PetscMemType rootmtype, leafmtype; 1494 1495 PetscFunctionBegin; 1496 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1497 PetscCall(PetscSFSetUp(sf)); 1498 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1499 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1500 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1501 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1502 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1503 PetscFunctionReturn(PETSC_SUCCESS); 1504 } 1505 1506 /*@C 1507 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1508 to `PetscSFBcastEnd()` 1509 1510 Collective 1511 1512 Input Parameters: 1513 + sf - star forest on which to communicate 1514 . unit - data type associated with each node 1515 . rootmtype - memory type of rootdata 1516 . rootdata - buffer to broadcast 1517 . leafmtype - memory type of leafdata 1518 - op - operation to use for reduction 1519 1520 Output Parameter: 1521 . leafdata - buffer to be reduced with values from each leaf's respective root 1522 1523 Level: intermediate 1524 1525 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1526 @*/ 1527 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1528 { 1529 PetscFunctionBegin; 1530 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1531 PetscCall(PetscSFSetUp(sf)); 1532 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1533 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1534 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1535 PetscFunctionReturn(PETSC_SUCCESS); 1536 } 1537 1538 /*@C 1539 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1540 1541 Collective 1542 1543 Input Parameters: 1544 + sf - star forest 1545 . unit - data type 1546 . rootdata - buffer to broadcast 1547 - op - operation to use for reduction 1548 1549 Output Parameter: 1550 . leafdata - buffer to be reduced with values from each leaf's respective root 1551 1552 Level: intermediate 1553 1554 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1555 @*/ 1556 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1557 { 1558 PetscFunctionBegin; 1559 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1560 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1561 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1562 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1563 PetscFunctionReturn(PETSC_SUCCESS); 1564 } 1565 1566 /*@C 1567 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1568 1569 Collective 1570 1571 Input Parameters: 1572 + sf - star forest 1573 . unit - data type 1574 . leafdata - values to reduce 1575 - op - reduction operation 1576 1577 Output Parameter: 1578 . rootdata - result of reduction of values from all leaves of each root 1579 1580 Level: intermediate 1581 1582 Note: 1583 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1584 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1585 use `PetscSFReduceWithMemTypeBegin()` instead. 1586 1587 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 1588 @*/ 1589 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1590 { 1591 PetscMemType rootmtype, leafmtype; 1592 1593 PetscFunctionBegin; 1594 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1595 PetscCall(PetscSFSetUp(sf)); 1596 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1597 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1598 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1599 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1600 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1601 PetscFunctionReturn(PETSC_SUCCESS); 1602 } 1603 1604 /*@C 1605 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1606 1607 Collective 1608 1609 Input Parameters: 1610 + sf - star forest 1611 . unit - data type 1612 . leafmtype - memory type of leafdata 1613 . leafdata - values to reduce 1614 . rootmtype - memory type of rootdata 1615 - op - reduction operation 1616 1617 Output Parameter: 1618 . rootdata - result of reduction of values from all leaves of each root 1619 1620 Level: intermediate 1621 1622 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1623 @*/ 1624 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1625 { 1626 PetscFunctionBegin; 1627 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1628 PetscCall(PetscSFSetUp(sf)); 1629 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1630 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1631 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1632 PetscFunctionReturn(PETSC_SUCCESS); 1633 } 1634 1635 /*@C 1636 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1637 1638 Collective 1639 1640 Input Parameters: 1641 + sf - star forest 1642 . unit - data type 1643 . leafdata - values to reduce 1644 - op - reduction operation 1645 1646 Output Parameter: 1647 . rootdata - result of reduction of values from all leaves of each root 1648 1649 Level: intermediate 1650 1651 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1652 @*/ 1653 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1654 { 1655 PetscFunctionBegin; 1656 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1657 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1658 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1659 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 /*@C 1664 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1665 to be completed with `PetscSFFetchAndOpEnd()` 1666 1667 Collective 1668 1669 Input Parameters: 1670 + sf - star forest 1671 . unit - data type 1672 . leafdata - leaf values to use in reduction 1673 - op - operation to use for reduction 1674 1675 Output Parameters: 1676 + rootdata - root values to be updated, input state is seen by first process to perform an update 1677 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1678 1679 Level: advanced 1680 1681 Note: 1682 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1683 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1684 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1685 integers. 1686 1687 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1688 @*/ 1689 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1690 { 1691 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1692 1693 PetscFunctionBegin; 1694 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1695 PetscCall(PetscSFSetUp(sf)); 1696 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1697 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1698 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1699 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1700 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1701 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1702 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1703 PetscFunctionReturn(PETSC_SUCCESS); 1704 } 1705 1706 /*@C 1707 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1708 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1709 1710 Collective 1711 1712 Input Parameters: 1713 + sf - star forest 1714 . unit - data type 1715 . rootmtype - memory type of rootdata 1716 . leafmtype - memory type of leafdata 1717 . leafdata - leaf values to use in reduction 1718 . leafupdatemtype - memory type of leafupdate 1719 - op - operation to use for reduction 1720 1721 Output Parameters: 1722 + rootdata - root values to be updated, input state is seen by first process to perform an update 1723 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1724 1725 Level: advanced 1726 1727 Note: 1728 See `PetscSFFetchAndOpBegin()` for more details. 1729 1730 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1731 @*/ 1732 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1733 { 1734 PetscFunctionBegin; 1735 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1736 PetscCall(PetscSFSetUp(sf)); 1737 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1738 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1739 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1740 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1741 PetscFunctionReturn(PETSC_SUCCESS); 1742 } 1743 1744 /*@C 1745 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1746 to fetch values from roots and update atomically by applying operation using my leaf value 1747 1748 Collective 1749 1750 Input Parameters: 1751 + sf - star forest 1752 . unit - data type 1753 . leafdata - leaf values to use in reduction 1754 - op - operation to use for reduction 1755 1756 Output Parameters: 1757 + rootdata - root values to be updated, input state is seen by first process to perform an update 1758 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1759 1760 Level: advanced 1761 1762 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1763 @*/ 1764 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1765 { 1766 PetscFunctionBegin; 1767 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1768 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1769 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1770 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1771 PetscFunctionReturn(PETSC_SUCCESS); 1772 } 1773 1774 /*@C 1775 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1776 1777 Collective 1778 1779 Input Parameter: 1780 . sf - star forest 1781 1782 Output Parameter: 1783 . degree - degree of each root vertex 1784 1785 Level: advanced 1786 1787 Note: 1788 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1789 1790 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1791 @*/ 1792 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[]) 1793 { 1794 PetscFunctionBegin; 1795 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1796 PetscSFCheckGraphSet(sf, 1); 1797 PetscAssertPointer(degree, 2); 1798 if (!sf->degreeknown) { 1799 PetscInt i, nroots = sf->nroots, maxlocal; 1800 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1801 maxlocal = sf->maxleaf - sf->minleaf + 1; 1802 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1803 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1804 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1805 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1806 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1807 } 1808 *degree = NULL; 1809 PetscFunctionReturn(PETSC_SUCCESS); 1810 } 1811 1812 /*@C 1813 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1814 1815 Collective 1816 1817 Input Parameter: 1818 . sf - star forest 1819 1820 Output Parameter: 1821 . degree - degree of each root vertex 1822 1823 Level: developer 1824 1825 Note: 1826 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1827 1828 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1829 @*/ 1830 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1831 { 1832 PetscFunctionBegin; 1833 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1834 PetscSFCheckGraphSet(sf, 1); 1835 PetscAssertPointer(degree, 2); 1836 if (!sf->degreeknown) { 1837 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1838 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1839 PetscCall(PetscFree(sf->degreetmp)); 1840 sf->degreeknown = PETSC_TRUE; 1841 } 1842 *degree = sf->degree; 1843 PetscFunctionReturn(PETSC_SUCCESS); 1844 } 1845 1846 /*@C 1847 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1848 Each multi-root is assigned index of the corresponding original root. 1849 1850 Collective 1851 1852 Input Parameters: 1853 + sf - star forest 1854 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1855 1856 Output Parameters: 1857 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1858 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1859 1860 Level: developer 1861 1862 Note: 1863 The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1864 1865 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1866 @*/ 1867 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1868 { 1869 PetscSF msf; 1870 PetscInt k = 0, nroots, nmroots; 1871 1872 PetscFunctionBegin; 1873 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1874 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1875 if (nroots) PetscAssertPointer(degree, 2); 1876 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1877 PetscAssertPointer(multiRootsOrigNumbering, 4); 1878 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1879 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1880 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1881 for (PetscInt i = 0; i < nroots; i++) { 1882 if (!degree[i]) continue; 1883 for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1884 } 1885 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1886 if (nMultiRoots) *nMultiRoots = nmroots; 1887 PetscFunctionReturn(PETSC_SUCCESS); 1888 } 1889 1890 /*@C 1891 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1892 1893 Collective 1894 1895 Input Parameters: 1896 + sf - star forest 1897 . unit - data type 1898 - leafdata - leaf data to gather to roots 1899 1900 Output Parameter: 1901 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1902 1903 Level: intermediate 1904 1905 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1906 @*/ 1907 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1908 { 1909 PetscSF multi = NULL; 1910 1911 PetscFunctionBegin; 1912 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1913 PetscCall(PetscSFSetUp(sf)); 1914 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1915 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1916 PetscFunctionReturn(PETSC_SUCCESS); 1917 } 1918 1919 /*@C 1920 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1921 1922 Collective 1923 1924 Input Parameters: 1925 + sf - star forest 1926 . unit - data type 1927 - leafdata - leaf data to gather to roots 1928 1929 Output Parameter: 1930 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1931 1932 Level: intermediate 1933 1934 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1935 @*/ 1936 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1937 { 1938 PetscSF multi = NULL; 1939 1940 PetscFunctionBegin; 1941 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1942 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1943 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1944 PetscFunctionReturn(PETSC_SUCCESS); 1945 } 1946 1947 /*@C 1948 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1949 1950 Collective 1951 1952 Input Parameters: 1953 + sf - star forest 1954 . unit - data type 1955 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1956 1957 Output Parameter: 1958 . leafdata - leaf data to be update with personal data from each respective root 1959 1960 Level: intermediate 1961 1962 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 1963 @*/ 1964 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1965 { 1966 PetscSF multi = NULL; 1967 1968 PetscFunctionBegin; 1969 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1970 PetscCall(PetscSFSetUp(sf)); 1971 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1972 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1973 PetscFunctionReturn(PETSC_SUCCESS); 1974 } 1975 1976 /*@C 1977 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1978 1979 Collective 1980 1981 Input Parameters: 1982 + sf - star forest 1983 . unit - data type 1984 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1985 1986 Output Parameter: 1987 . leafdata - leaf data to be update with personal data from each respective root 1988 1989 Level: intermediate 1990 1991 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1992 @*/ 1993 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1994 { 1995 PetscSF multi = NULL; 1996 1997 PetscFunctionBegin; 1998 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1999 PetscCall(PetscSFGetMultiSF(sf, &multi)); 2000 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 2001 PetscFunctionReturn(PETSC_SUCCESS); 2002 } 2003 2004 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 2005 { 2006 PetscInt i, n, nleaves; 2007 const PetscInt *ilocal = NULL; 2008 PetscHSetI seen; 2009 2010 PetscFunctionBegin; 2011 if (PetscDefined(USE_DEBUG)) { 2012 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 2013 PetscCall(PetscHSetICreate(&seen)); 2014 for (i = 0; i < nleaves; i++) { 2015 const PetscInt leaf = ilocal ? ilocal[i] : i; 2016 PetscCall(PetscHSetIAdd(seen, leaf)); 2017 } 2018 PetscCall(PetscHSetIGetSize(seen, &n)); 2019 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 2020 PetscCall(PetscHSetIDestroy(&seen)); 2021 } 2022 PetscFunctionReturn(PETSC_SUCCESS); 2023 } 2024 2025 /*@ 2026 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2027 2028 Input Parameters: 2029 + sfA - The first `PetscSF` 2030 - sfB - The second `PetscSF` 2031 2032 Output Parameter: 2033 . sfBA - The composite `PetscSF` 2034 2035 Level: developer 2036 2037 Notes: 2038 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2039 forests, i.e. the same leaf is not connected with different roots. 2040 2041 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 2042 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 2043 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 2044 `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 2045 2046 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2047 @*/ 2048 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2049 { 2050 const PetscSFNode *remotePointsA, *remotePointsB; 2051 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 2052 const PetscInt *localPointsA, *localPointsB; 2053 PetscInt *localPointsBA; 2054 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 2055 PetscBool denseB; 2056 2057 PetscFunctionBegin; 2058 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2059 PetscSFCheckGraphSet(sfA, 1); 2060 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2061 PetscSFCheckGraphSet(sfB, 2); 2062 PetscCheckSameComm(sfA, 1, sfB, 2); 2063 PetscAssertPointer(sfBA, 3); 2064 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2065 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2066 2067 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2068 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2069 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2070 numRootsB; otherwise, garbage will be broadcasted. 2071 Example (comm size = 1): 2072 sfA: 0 <- (0, 0) 2073 sfB: 100 <- (0, 0) 2074 101 <- (0, 1) 2075 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2076 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2077 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2078 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2079 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2080 for (i = 0; i < numRootsB; i++) { 2081 reorderedRemotePointsA[i].rank = -1; 2082 reorderedRemotePointsA[i].index = -1; 2083 } 2084 for (i = 0; i < numLeavesA; i++) { 2085 PetscInt localp = localPointsA ? localPointsA[i] : i; 2086 2087 if (localp >= numRootsB) continue; 2088 reorderedRemotePointsA[localp] = remotePointsA[i]; 2089 } 2090 remotePointsA = reorderedRemotePointsA; 2091 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2092 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2093 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2094 leafdataB[i].rank = -1; 2095 leafdataB[i].index = -1; 2096 } 2097 PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2098 PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2099 PetscCall(PetscFree(reorderedRemotePointsA)); 2100 2101 denseB = (PetscBool)!localPointsB; 2102 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2103 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2104 else numLeavesBA++; 2105 } 2106 if (denseB) { 2107 localPointsBA = NULL; 2108 remotePointsBA = leafdataB; 2109 } else { 2110 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2111 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2112 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2113 const PetscInt l = localPointsB ? localPointsB[i] : i; 2114 2115 if (leafdataB[l - minleaf].rank == -1) continue; 2116 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2117 localPointsBA[numLeavesBA] = l; 2118 numLeavesBA++; 2119 } 2120 PetscCall(PetscFree(leafdataB)); 2121 } 2122 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2123 PetscCall(PetscSFSetFromOptions(*sfBA)); 2124 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2125 PetscFunctionReturn(PETSC_SUCCESS); 2126 } 2127 2128 /*@ 2129 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2130 2131 Input Parameters: 2132 + sfA - The first `PetscSF` 2133 - sfB - The second `PetscSF` 2134 2135 Output Parameter: 2136 . sfBA - The composite `PetscSF`. 2137 2138 Level: developer 2139 2140 Notes: 2141 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2142 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2143 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2144 2145 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2146 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2147 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 2148 on `sfA`, then 2149 a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 2150 2151 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2152 @*/ 2153 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2154 { 2155 const PetscSFNode *remotePointsA, *remotePointsB; 2156 PetscSFNode *remotePointsBA; 2157 const PetscInt *localPointsA, *localPointsB; 2158 PetscSFNode *reorderedRemotePointsA = NULL; 2159 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2160 MPI_Op op; 2161 #if defined(PETSC_USE_64BIT_INDICES) 2162 PetscBool iswin; 2163 #endif 2164 2165 PetscFunctionBegin; 2166 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2167 PetscSFCheckGraphSet(sfA, 1); 2168 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2169 PetscSFCheckGraphSet(sfB, 2); 2170 PetscCheckSameComm(sfA, 1, sfB, 2); 2171 PetscAssertPointer(sfBA, 3); 2172 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2173 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2174 2175 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2176 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2177 2178 /* TODO: Check roots of sfB have degree of 1 */ 2179 /* Once we implement it, we can replace the MPI_MAXLOC 2180 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2181 We use MPI_MAXLOC only to have a deterministic output from this routine if 2182 the root condition is not meet. 2183 */ 2184 op = MPI_MAXLOC; 2185 #if defined(PETSC_USE_64BIT_INDICES) 2186 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2187 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2188 if (iswin) op = MPI_REPLACE; 2189 #endif 2190 2191 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2192 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2193 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2194 reorderedRemotePointsA[i].rank = -1; 2195 reorderedRemotePointsA[i].index = -1; 2196 } 2197 if (localPointsA) { 2198 for (i = 0; i < numLeavesA; i++) { 2199 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2200 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2201 } 2202 } else { 2203 for (i = 0; i < numLeavesA; i++) { 2204 if (i > maxleaf || i < minleaf) continue; 2205 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2206 } 2207 } 2208 2209 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2210 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2211 for (i = 0; i < numRootsB; i++) { 2212 remotePointsBA[i].rank = -1; 2213 remotePointsBA[i].index = -1; 2214 } 2215 2216 PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2217 PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2218 PetscCall(PetscFree(reorderedRemotePointsA)); 2219 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2220 if (remotePointsBA[i].rank == -1) continue; 2221 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2222 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2223 localPointsBA[numLeavesBA] = i; 2224 numLeavesBA++; 2225 } 2226 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2227 PetscCall(PetscSFSetFromOptions(*sfBA)); 2228 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } 2231 2232 /* 2233 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2234 2235 Input Parameter: 2236 . sf - The global `PetscSF` 2237 2238 Output Parameter: 2239 . out - The local `PetscSF` 2240 2241 .seealso: `PetscSF`, `PetscSFCreate()` 2242 */ 2243 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2244 { 2245 MPI_Comm comm; 2246 PetscMPIInt myrank; 2247 const PetscInt *ilocal; 2248 const PetscSFNode *iremote; 2249 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2250 PetscSFNode *liremote; 2251 PetscSF lsf; 2252 2253 PetscFunctionBegin; 2254 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2255 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2256 else { 2257 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2258 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2259 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2260 2261 /* Find out local edges and build a local SF */ 2262 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2263 for (i = lnleaves = 0; i < nleaves; i++) { 2264 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2265 } 2266 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2267 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2268 2269 for (i = j = 0; i < nleaves; i++) { 2270 if (iremote[i].rank == (PetscInt)myrank) { 2271 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2272 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2273 liremote[j].index = iremote[i].index; 2274 j++; 2275 } 2276 } 2277 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2278 PetscCall(PetscSFSetFromOptions(lsf)); 2279 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2280 PetscCall(PetscSFSetUp(lsf)); 2281 *out = lsf; 2282 } 2283 PetscFunctionReturn(PETSC_SUCCESS); 2284 } 2285 2286 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2287 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2288 { 2289 PetscMemType rootmtype, leafmtype; 2290 2291 PetscFunctionBegin; 2292 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2293 PetscCall(PetscSFSetUp(sf)); 2294 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2295 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2296 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2297 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2298 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2299 PetscFunctionReturn(PETSC_SUCCESS); 2300 } 2301 2302 /*@ 2303 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2304 2305 Input Parameters: 2306 + comm - the communicator 2307 . nsfs - the number of input `PetscSF` 2308 . sfs - the array of input `PetscSF` 2309 . rootMode - the root mode specifying how roots are handled 2310 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2311 2312 Output Parameter: 2313 . newsf - The resulting `PetscSF` 2314 2315 Level: advanced 2316 2317 Notes: 2318 The communicator of all `PetscSF`s in `sfs` must be comm. 2319 2320 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2321 2322 The offsets in `leafOffsets` are added to the original leaf indices. 2323 2324 If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2325 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2326 2327 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2328 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2329 2330 All root modes retain the essential connectivity condition. 2331 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2332 Parameter `rootMode` controls how the input root spaces are combined. 2333 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2334 and is also the same in the output `PetscSF`. 2335 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2336 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2337 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2338 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2339 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2340 the original root ranks are ignored. 2341 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2342 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 2343 to keep the load balancing. 2344 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 2345 2346 Example: 2347 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2348 .vb 2349 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2350 for m in {local,global,shared}; do 2351 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2352 done 2353 .ve 2354 we generate two identical `PetscSF`s sf_0 and sf_1, 2355 .vb 2356 PetscSF Object: sf_0 2 MPI processes 2357 type: basic 2358 rank #leaves #roots 2359 [ 0] 4 2 2360 [ 1] 4 2 2361 leaves roots roots in global numbering 2362 ( 0, 0) <- ( 0, 0) = 0 2363 ( 0, 1) <- ( 0, 1) = 1 2364 ( 0, 2) <- ( 1, 0) = 2 2365 ( 0, 3) <- ( 1, 1) = 3 2366 ( 1, 0) <- ( 0, 0) = 0 2367 ( 1, 1) <- ( 0, 1) = 1 2368 ( 1, 2) <- ( 1, 0) = 2 2369 ( 1, 3) <- ( 1, 1) = 3 2370 .ve 2371 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2372 .vb 2373 rootMode = local: 2374 PetscSF Object: result_sf 2 MPI processes 2375 type: basic 2376 rank #leaves #roots 2377 [ 0] 8 4 2378 [ 1] 8 4 2379 leaves roots roots in global numbering 2380 ( 0, 0) <- ( 0, 0) = 0 2381 ( 0, 1) <- ( 0, 1) = 1 2382 ( 0, 2) <- ( 1, 0) = 4 2383 ( 0, 3) <- ( 1, 1) = 5 2384 ( 0, 4) <- ( 0, 2) = 2 2385 ( 0, 5) <- ( 0, 3) = 3 2386 ( 0, 6) <- ( 1, 2) = 6 2387 ( 0, 7) <- ( 1, 3) = 7 2388 ( 1, 0) <- ( 0, 0) = 0 2389 ( 1, 1) <- ( 0, 1) = 1 2390 ( 1, 2) <- ( 1, 0) = 4 2391 ( 1, 3) <- ( 1, 1) = 5 2392 ( 1, 4) <- ( 0, 2) = 2 2393 ( 1, 5) <- ( 0, 3) = 3 2394 ( 1, 6) <- ( 1, 2) = 6 2395 ( 1, 7) <- ( 1, 3) = 7 2396 2397 rootMode = global: 2398 PetscSF Object: result_sf 2 MPI processes 2399 type: basic 2400 rank #leaves #roots 2401 [ 0] 8 4 2402 [ 1] 8 4 2403 leaves roots roots in global numbering 2404 ( 0, 0) <- ( 0, 0) = 0 2405 ( 0, 1) <- ( 0, 1) = 1 2406 ( 0, 2) <- ( 0, 2) = 2 2407 ( 0, 3) <- ( 0, 3) = 3 2408 ( 0, 4) <- ( 1, 0) = 4 2409 ( 0, 5) <- ( 1, 1) = 5 2410 ( 0, 6) <- ( 1, 2) = 6 2411 ( 0, 7) <- ( 1, 3) = 7 2412 ( 1, 0) <- ( 0, 0) = 0 2413 ( 1, 1) <- ( 0, 1) = 1 2414 ( 1, 2) <- ( 0, 2) = 2 2415 ( 1, 3) <- ( 0, 3) = 3 2416 ( 1, 4) <- ( 1, 0) = 4 2417 ( 1, 5) <- ( 1, 1) = 5 2418 ( 1, 6) <- ( 1, 2) = 6 2419 ( 1, 7) <- ( 1, 3) = 7 2420 2421 rootMode = shared: 2422 PetscSF Object: result_sf 2 MPI processes 2423 type: basic 2424 rank #leaves #roots 2425 [ 0] 8 2 2426 [ 1] 8 2 2427 leaves roots roots in global numbering 2428 ( 0, 0) <- ( 0, 0) = 0 2429 ( 0, 1) <- ( 0, 1) = 1 2430 ( 0, 2) <- ( 1, 0) = 2 2431 ( 0, 3) <- ( 1, 1) = 3 2432 ( 0, 4) <- ( 0, 0) = 0 2433 ( 0, 5) <- ( 0, 1) = 1 2434 ( 0, 6) <- ( 1, 0) = 2 2435 ( 0, 7) <- ( 1, 1) = 3 2436 ( 1, 0) <- ( 0, 0) = 0 2437 ( 1, 1) <- ( 0, 1) = 1 2438 ( 1, 2) <- ( 1, 0) = 2 2439 ( 1, 3) <- ( 1, 1) = 3 2440 ( 1, 4) <- ( 0, 0) = 0 2441 ( 1, 5) <- ( 0, 1) = 1 2442 ( 1, 6) <- ( 1, 0) = 2 2443 ( 1, 7) <- ( 1, 1) = 3 2444 .ve 2445 2446 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2447 @*/ 2448 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2449 { 2450 PetscInt i, s, nLeaves, nRoots; 2451 PetscInt *leafArrayOffsets; 2452 PetscInt *ilocal_new; 2453 PetscSFNode *iremote_new; 2454 PetscBool all_ilocal_null = PETSC_FALSE; 2455 PetscLayout glayout = NULL; 2456 PetscInt *gremote = NULL; 2457 PetscMPIInt rank, size; 2458 2459 PetscFunctionBegin; 2460 if (PetscDefined(USE_DEBUG)) { 2461 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2462 2463 PetscCall(PetscSFCreate(comm, &dummy)); 2464 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2465 PetscAssertPointer(sfs, 3); 2466 for (i = 0; i < nsfs; i++) { 2467 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2468 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2469 } 2470 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2471 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2472 PetscAssertPointer(newsf, 6); 2473 PetscCall(PetscSFDestroy(&dummy)); 2474 } 2475 if (!nsfs) { 2476 PetscCall(PetscSFCreate(comm, newsf)); 2477 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2478 PetscFunctionReturn(PETSC_SUCCESS); 2479 } 2480 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2481 PetscCallMPI(MPI_Comm_size(comm, &size)); 2482 2483 /* Calculate leaf array offsets */ 2484 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2485 leafArrayOffsets[0] = 0; 2486 for (s = 0; s < nsfs; s++) { 2487 PetscInt nl; 2488 2489 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2490 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2491 } 2492 nLeaves = leafArrayOffsets[nsfs]; 2493 2494 /* Calculate number of roots */ 2495 switch (rootMode) { 2496 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2497 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2498 if (PetscDefined(USE_DEBUG)) { 2499 for (s = 1; s < nsfs; s++) { 2500 PetscInt nr; 2501 2502 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2503 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2504 } 2505 } 2506 } break; 2507 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2508 /* Calculate also global layout in this case */ 2509 PetscInt *nls; 2510 PetscLayout *lts; 2511 PetscInt **inds; 2512 PetscInt j; 2513 PetscInt rootOffset = 0; 2514 2515 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2516 PetscCall(PetscLayoutCreate(comm, &glayout)); 2517 glayout->bs = 1; 2518 glayout->n = 0; 2519 glayout->N = 0; 2520 for (s = 0; s < nsfs; s++) { 2521 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2522 glayout->n += lts[s]->n; 2523 glayout->N += lts[s]->N; 2524 } 2525 PetscCall(PetscLayoutSetUp(glayout)); 2526 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2527 for (s = 0, j = 0; s < nsfs; s++) { 2528 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2529 rootOffset += lts[s]->N; 2530 PetscCall(PetscLayoutDestroy(<s[s])); 2531 PetscCall(PetscFree(inds[s])); 2532 } 2533 PetscCall(PetscFree3(lts, nls, inds)); 2534 nRoots = glayout->N; 2535 } break; 2536 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2537 /* nRoots calculated later in this case */ 2538 break; 2539 default: 2540 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2541 } 2542 2543 if (!leafOffsets) { 2544 all_ilocal_null = PETSC_TRUE; 2545 for (s = 0; s < nsfs; s++) { 2546 const PetscInt *ilocal; 2547 2548 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2549 if (ilocal) { 2550 all_ilocal_null = PETSC_FALSE; 2551 break; 2552 } 2553 } 2554 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2555 } 2556 2557 /* Renumber and concatenate local leaves */ 2558 ilocal_new = NULL; 2559 if (!all_ilocal_null) { 2560 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2561 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2562 for (s = 0; s < nsfs; s++) { 2563 const PetscInt *ilocal; 2564 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2565 PetscInt i, nleaves_l; 2566 2567 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2568 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2569 } 2570 } 2571 2572 /* Renumber and concatenate remote roots */ 2573 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2574 PetscInt rootOffset = 0; 2575 2576 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2577 for (i = 0; i < nLeaves; i++) { 2578 iremote_new[i].rank = -1; 2579 iremote_new[i].index = -1; 2580 } 2581 for (s = 0; s < nsfs; s++) { 2582 PetscInt i, nl, nr; 2583 PetscSF tmp_sf; 2584 const PetscSFNode *iremote; 2585 PetscSFNode *tmp_rootdata; 2586 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2587 2588 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2589 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2590 /* create helper SF with contiguous leaves */ 2591 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2592 PetscCall(PetscSFSetUp(tmp_sf)); 2593 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2594 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2595 for (i = 0; i < nr; i++) { 2596 tmp_rootdata[i].index = i + rootOffset; 2597 tmp_rootdata[i].rank = rank; 2598 } 2599 rootOffset += nr; 2600 } else { 2601 for (i = 0; i < nr; i++) { 2602 tmp_rootdata[i].index = i; 2603 tmp_rootdata[i].rank = rank; 2604 } 2605 } 2606 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2607 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2608 PetscCall(PetscSFDestroy(&tmp_sf)); 2609 PetscCall(PetscFree(tmp_rootdata)); 2610 } 2611 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2612 2613 /* Build the new SF */ 2614 PetscCall(PetscSFCreate(comm, newsf)); 2615 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2616 } else { 2617 /* Build the new SF */ 2618 PetscCall(PetscSFCreate(comm, newsf)); 2619 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2620 } 2621 PetscCall(PetscSFSetUp(*newsf)); 2622 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2623 PetscCall(PetscLayoutDestroy(&glayout)); 2624 PetscCall(PetscFree(gremote)); 2625 PetscCall(PetscFree(leafArrayOffsets)); 2626 PetscFunctionReturn(PETSC_SUCCESS); 2627 } 2628 2629 /*@ 2630 PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications. 2631 2632 Collective 2633 2634 Input Parameters: 2635 + sf - star forest 2636 . unit - the data type contained within the root and leaf data 2637 . rootdata - root data that will be used for multiple PetscSF communications 2638 - leafdata - leaf data that will be used for multiple PetscSF communications 2639 2640 Level: advanced 2641 2642 Notes: 2643 Implementations of `PetscSF` can make optimizations 2644 for repeated communication using the same memory regions, but these optimizations 2645 can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed. 2646 The intended pattern is 2647 2648 .vb 2649 PetscMalloc2(nroots, &rootdata, nleaves, &leafdata); 2650 2651 PetscSFRegisterPersistent(sf, unit, rootdata, leafdata); 2652 // repeated use of rootdata and leafdata will now be optimized 2653 2654 PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE); 2655 PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE); 2656 // ... 2657 PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM); 2658 PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM); 2659 // ... (other communications) 2660 2661 // rootdata and leafdata must be deregistered before freeing 2662 // skipping this can lead to undefined behavior including 2663 // deadlocks 2664 PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata); 2665 2666 // it is now safe to free rootdata and leafdata 2667 PetscFree2(rootdata, leafdata); 2668 .ve 2669 2670 If you do not register `rootdata` and `leafdata` it will not cause an error, 2671 but optimizations that reduce the setup time for each communication cannot be 2672 made. Currently, the only implementation of `PetscSF` that benefits from 2673 `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default 2674 `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`. 2675 2676 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()` 2677 @*/ 2678 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2679 { 2680 PetscFunctionBegin; 2681 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2682 PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2683 PetscFunctionReturn(PETSC_SUCCESS); 2684 } 2685 2686 /*@ 2687 PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded. 2688 2689 Collective 2690 2691 Input Parameters: 2692 + sf - star forest 2693 . unit - the data type contained within the root and leaf data 2694 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()` 2695 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()` 2696 2697 Level: advanced 2698 2699 Note: 2700 See `PetscSFRegisterPersistent()` for when/how to use this function. 2701 2702 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()` 2703 @*/ 2704 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2705 { 2706 PetscFunctionBegin; 2707 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2708 PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2709 PetscFunctionReturn(PETSC_SUCCESS); 2710 } 2711 2712 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size) 2713 { 2714 MPI_Aint lb, lb_true, bytes, bytes_true; 2715 2716 PetscFunctionBegin; 2717 PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 2718 PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 2719 PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 2720 *size = bytes; 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723