1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #include <petscdevice_cuda.h> 9 #endif 10 11 #if defined(PETSC_HAVE_HIP) 12 #include <hip/hip_runtime.h> 13 #endif 14 15 #if defined(PETSC_CLANG_STATIC_ANALYZER) 16 extern void PetscSFCheckGraphSet(PetscSF, int); 17 #else 18 #if defined(PETSC_USE_DEBUG) 19 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 20 #else 21 #define PetscSFCheckGraphSet(sf, arg) \ 22 do { \ 23 } while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 29 30 /*@ 31 PetscSFCreate - create a star forest communication context 32 33 Collective 34 35 Input Parameter: 36 . comm - communicator on which the star forest will operate 37 38 Output Parameter: 39 . sf - new star forest context 40 41 Options Database Key: 42 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 43 . -sf_type window - Use MPI-3 one-sided window for communication 44 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 46 47 Level: intermediate 48 49 Note: 50 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 51 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 52 `SF`s are optimized and they have better performance than the general `SF`s. 53 54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 55 @*/ 56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 57 { 58 PetscSF b; 59 60 PetscFunctionBegin; 61 PetscAssertPointer(sf, 2); 62 PetscCall(PetscSFInitializePackage()); 63 64 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 65 b->nroots = -1; 66 b->nleaves = -1; 67 b->minleaf = PETSC_INT_MAX; 68 b->maxleaf = PETSC_INT_MIN; 69 b->nranks = -1; 70 b->rankorder = PETSC_TRUE; 71 b->ingroup = MPI_GROUP_NULL; 72 b->outgroup = MPI_GROUP_NULL; 73 b->graphset = PETSC_FALSE; 74 #if defined(PETSC_HAVE_DEVICE) 75 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 76 b->use_stream_aware_mpi = PETSC_FALSE; 77 b->unknown_input_stream = PETSC_FALSE; 78 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 79 b->backend = PETSCSF_BACKEND_KOKKOS; 80 #elif defined(PETSC_HAVE_CUDA) 81 b->backend = PETSCSF_BACKEND_CUDA; 82 #elif defined(PETSC_HAVE_HIP) 83 b->backend = PETSCSF_BACKEND_HIP; 84 #endif 85 86 #if defined(PETSC_HAVE_NVSHMEM) 87 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 88 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 89 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 90 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 91 #endif 92 #endif 93 b->vscat.from_n = -1; 94 b->vscat.to_n = -1; 95 b->vscat.unit = MPIU_SCALAR; 96 *sf = b; 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 /*@ 101 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 102 103 Collective 104 105 Input Parameter: 106 . sf - star forest 107 108 Level: advanced 109 110 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 111 @*/ 112 PetscErrorCode PetscSFReset(PetscSF sf) 113 { 114 PetscFunctionBegin; 115 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 116 PetscTryTypeMethod(sf, Reset); 117 PetscCall(PetscSFDestroy(&sf->rankssf)); 118 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_INT_MAX; 122 sf->maxleaf = PETSC_INT_MIN; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 PetscCall(PetscFree(sf->mine_alloc)); 127 PetscCall(PetscFree(sf->remote_alloc)); 128 sf->nranks = -1; 129 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 130 sf->degreeknown = PETSC_FALSE; 131 PetscCall(PetscFree(sf->degree)); 132 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 133 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 134 135 if (sf->multi) sf->multi->multi = NULL; 136 PetscCall(PetscSFDestroy(&sf->multi)); 137 138 PetscCall(PetscLayoutDestroy(&sf->map)); 139 140 #if defined(PETSC_HAVE_DEVICE) 141 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 142 #endif 143 144 sf->setupcalled = PETSC_FALSE; 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 /*@ 149 PetscSFSetType - Set the `PetscSF` communication implementation 150 151 Collective 152 153 Input Parameters: 154 + sf - the `PetscSF` context 155 - type - a known method 156 .vb 157 PETSCSFWINDOW - MPI-2/3 one-sided 158 PETSCSFBASIC - basic implementation using MPI-1 two-sided 159 .ve 160 161 Options Database Key: 162 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 163 164 Level: intermediate 165 166 Notes: 167 See `PetscSFType` for possible values 168 169 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 170 @*/ 171 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 172 { 173 PetscBool match; 174 PetscErrorCode (*r)(PetscSF); 175 176 PetscFunctionBegin; 177 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 178 PetscAssertPointer(type, 2); 179 180 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 181 if (match) PetscFunctionReturn(PETSC_SUCCESS); 182 183 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 184 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 185 /* Destroy the previous PetscSF implementation context */ 186 PetscTryTypeMethod(sf, Destroy); 187 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 188 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 189 PetscCall((*r)(sf)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 /*@ 194 PetscSFGetType - Get the `PetscSF` communication implementation 195 196 Not Collective 197 198 Input Parameter: 199 . sf - the `PetscSF` context 200 201 Output Parameter: 202 . type - the `PetscSF` type name 203 204 Level: intermediate 205 206 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 207 @*/ 208 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 209 { 210 PetscFunctionBegin; 211 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 212 PetscAssertPointer(type, 2); 213 *type = ((PetscObject)sf)->type_name; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 /*@ 218 PetscSFDestroy - destroy a star forest 219 220 Collective 221 222 Input Parameter: 223 . sf - address of star forest 224 225 Level: intermediate 226 227 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 228 @*/ 229 PetscErrorCode PetscSFDestroy(PetscSF *sf) 230 { 231 PetscFunctionBegin; 232 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 233 PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 234 if (--((PetscObject)*sf)->refct > 0) { 235 *sf = NULL; 236 PetscFunctionReturn(PETSC_SUCCESS); 237 } 238 PetscCall(PetscSFReset(*sf)); 239 PetscTryTypeMethod(*sf, Destroy); 240 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 241 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 242 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 243 if ((*sf)->use_stream_aware_mpi) { 244 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 245 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 246 } 247 #endif 248 PetscCall(PetscHeaderDestroy(sf)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 253 { 254 PetscInt i, nleaves; 255 PetscMPIInt size; 256 const PetscInt *ilocal; 257 const PetscSFNode *iremote; 258 259 PetscFunctionBegin; 260 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 261 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 262 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 263 for (i = 0; i < nleaves; i++) { 264 const PetscInt rank = iremote[i].rank; 265 const PetscInt remote = iremote[i].index; 266 const PetscInt leaf = ilocal ? ilocal[i] : i; 267 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 268 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 269 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 270 } 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 /*@ 275 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 276 277 Collective 278 279 Input Parameter: 280 . sf - star forest communication object 281 282 Level: beginner 283 284 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 285 @*/ 286 PetscErrorCode PetscSFSetUp(PetscSF sf) 287 { 288 PetscFunctionBegin; 289 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 290 PetscSFCheckGraphSet(sf, 1); 291 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 292 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 293 PetscCall(PetscSFCheckGraphValid_Private(sf)); 294 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 295 PetscTryTypeMethod(sf, SetUp); 296 #if defined(PETSC_HAVE_CUDA) 297 if (sf->backend == PETSCSF_BACKEND_CUDA) { 298 sf->ops->Malloc = PetscSFMalloc_CUDA; 299 sf->ops->Free = PetscSFFree_CUDA; 300 } 301 #endif 302 #if defined(PETSC_HAVE_HIP) 303 if (sf->backend == PETSCSF_BACKEND_HIP) { 304 sf->ops->Malloc = PetscSFMalloc_HIP; 305 sf->ops->Free = PetscSFFree_HIP; 306 } 307 #endif 308 309 #if defined(PETSC_HAVE_KOKKOS) 310 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 311 sf->ops->Malloc = PetscSFMalloc_Kokkos; 312 sf->ops->Free = PetscSFFree_Kokkos; 313 } 314 #endif 315 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 316 sf->setupcalled = PETSC_TRUE; 317 PetscFunctionReturn(PETSC_SUCCESS); 318 } 319 320 /*@ 321 PetscSFSetFromOptions - set `PetscSF` options using the options database 322 323 Logically Collective 324 325 Input Parameter: 326 . sf - star forest 327 328 Options Database Keys: 329 + -sf_type - implementation type, see `PetscSFSetType()` 330 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 331 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 332 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 333 If true, this option only works with `-use_gpu_aware_mpi 1`. 334 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 335 If true, this option only works with `-use_gpu_aware_mpi 1`. 336 337 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 338 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 339 the only available is kokkos. 340 341 Level: intermediate 342 343 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 344 @*/ 345 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 346 { 347 PetscSFType deft; 348 char type[256]; 349 PetscBool flg; 350 351 PetscFunctionBegin; 352 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 353 PetscObjectOptionsBegin((PetscObject)sf); 354 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 355 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 356 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 357 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 358 PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL)); 359 #if defined(PETSC_HAVE_DEVICE) 360 { 361 char backendstr[32] = {0}; 362 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 363 /* Change the defaults set in PetscSFCreate() with command line options */ 364 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 365 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 366 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 367 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 368 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 369 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 370 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 371 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 372 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 373 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 374 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 375 #elif defined(PETSC_HAVE_KOKKOS) 376 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 377 #endif 378 379 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 380 if (sf->use_stream_aware_mpi) { 381 MPI_Info info; 382 383 PetscCallMPI(MPI_Info_create(&info)); 384 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 385 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 386 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 387 PetscCallMPI(MPI_Info_free(&info)); 388 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 389 } 390 #endif 391 } 392 #endif 393 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 394 PetscOptionsEnd(); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 /*@ 399 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 400 401 Logically Collective 402 403 Input Parameters: 404 + sf - star forest 405 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 406 407 Level: advanced 408 409 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 410 @*/ 411 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 412 { 413 PetscFunctionBegin; 414 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 415 PetscValidLogicalCollectiveBool(sf, flg, 2); 416 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 417 sf->rankorder = flg; 418 PetscFunctionReturn(PETSC_SUCCESS); 419 } 420 421 /*@ 422 PetscSFSetGraph - Set a parallel star forest 423 424 Collective 425 426 Input Parameters: 427 + sf - star forest 428 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 429 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 430 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 431 during setup in debug mode) 432 . localmode - copy mode for `ilocal` 433 . iremote - remote locations of root vertices for each leaf on the current process, length is 2 `nleaves' 434 (locations must be >= 0, enforced during setup in debug mode) 435 - remotemode - copy mode for `iremote` 436 437 Level: intermediate 438 439 Notes: 440 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 441 442 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 443 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 444 PETSc might modify the respective array; 445 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 446 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 447 448 Fortran Notes: 449 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 450 451 Developer Notes: 452 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 453 This also allows to compare leaf sets of two `PetscSF`s easily. 454 455 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 456 @*/ 457 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode) 458 { 459 PetscBool unique, contiguous; 460 461 PetscFunctionBegin; 462 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 463 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 464 if (nleaves > 0) PetscAssertPointer(iremote, 6); 465 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 466 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 467 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 468 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 469 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 470 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 471 472 if (sf->nroots >= 0) { /* Reset only if graph already set */ 473 PetscCall(PetscSFReset(sf)); 474 } 475 476 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 477 if (PetscDefined(USE_DEBUG)) { 478 PetscMPIInt size; 479 480 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 481 for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); } 482 } 483 484 sf->nroots = nroots; 485 sf->nleaves = nleaves; 486 487 if (localmode == PETSC_COPY_VALUES && ilocal) { 488 PetscInt *tlocal = NULL; 489 490 PetscCall(PetscMalloc1(nleaves, &tlocal)); 491 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 492 ilocal = tlocal; 493 } 494 if (remotemode == PETSC_COPY_VALUES) { 495 PetscSFNode *tremote = NULL; 496 497 PetscCall(PetscMalloc1(nleaves, &tremote)); 498 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 499 iremote = tremote; 500 } 501 502 if (nleaves && ilocal) { 503 PetscSFNode work; 504 505 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 506 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 507 unique = PetscNot(unique); 508 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 509 sf->minleaf = ilocal[0]; 510 sf->maxleaf = ilocal[nleaves - 1]; 511 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 512 } else { 513 sf->minleaf = 0; 514 sf->maxleaf = nleaves - 1; 515 unique = PETSC_TRUE; 516 contiguous = PETSC_TRUE; 517 } 518 519 if (contiguous) { 520 if (localmode == PETSC_USE_POINTER) { 521 ilocal = NULL; 522 } else { 523 PetscCall(PetscFree(ilocal)); 524 } 525 } 526 sf->mine = ilocal; 527 if (localmode == PETSC_USE_POINTER) { 528 sf->mine_alloc = NULL; 529 } else { 530 sf->mine_alloc = ilocal; 531 } 532 if (PetscDefined(USE_DEBUG)) { 533 PetscMPIInt size; 534 535 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 536 for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); } 537 } 538 sf->remote = iremote; 539 if (remotemode == PETSC_USE_POINTER) { 540 sf->remote_alloc = NULL; 541 } else { 542 sf->remote_alloc = iremote; 543 } 544 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 545 sf->graphset = PETSC_TRUE; 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 /*@ 550 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 551 552 Collective 553 554 Input Parameters: 555 + sf - The `PetscSF` 556 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 557 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 558 559 Level: intermediate 560 561 Notes: 562 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 563 `n` and `N` are the local and global sizes of `x` respectively. 564 565 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 566 sequential vectors `y` on all MPI processes. 567 568 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 569 sequential vector `y` on rank 0. 570 571 In above cases, entries of `x` are roots and entries of `y` are leaves. 572 573 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 574 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 575 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 576 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 577 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 578 579 In this case, roots and leaves are symmetric. 580 581 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 582 @*/ 583 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 584 { 585 MPI_Comm comm; 586 PetscInt n, N, res[2]; 587 PetscMPIInt rank, size; 588 PetscSFType type; 589 590 PetscFunctionBegin; 591 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 592 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 593 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 594 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 595 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 596 PetscCallMPI(MPI_Comm_size(comm, &size)); 597 598 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 599 PetscInt sizei = size; 600 601 type = PETSCSFALLTOALL; 602 PetscCall(PetscLayoutCreate(comm, &sf->map)); 603 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 604 PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei))); 605 PetscCall(PetscLayoutSetUp(sf->map)); 606 } else { 607 PetscCall(PetscLayoutGetLocalSize(map, &n)); 608 PetscCall(PetscLayoutGetSize(map, &N)); 609 res[0] = n; 610 res[1] = -n; 611 /* Check if n are same over all ranks so that we can optimize it */ 612 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 613 if (res[0] == -res[1]) { /* same n */ 614 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 615 } else { 616 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 617 } 618 PetscCall(PetscLayoutReference(map, &sf->map)); 619 } 620 PetscCall(PetscSFSetType(sf, type)); 621 622 sf->pattern = pattern; 623 sf->mine = NULL; /* Contiguous */ 624 625 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 626 Also set other easy stuff. 627 */ 628 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 629 sf->nleaves = N; 630 sf->nroots = n; 631 sf->nranks = size; 632 sf->minleaf = 0; 633 sf->maxleaf = N - 1; 634 } else if (pattern == PETSCSF_PATTERN_GATHER) { 635 sf->nleaves = rank ? 0 : N; 636 sf->nroots = n; 637 sf->nranks = rank ? 0 : size; 638 sf->minleaf = 0; 639 sf->maxleaf = rank ? -1 : N - 1; 640 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 641 sf->nleaves = size; 642 sf->nroots = size; 643 sf->nranks = size; 644 sf->minleaf = 0; 645 sf->maxleaf = size - 1; 646 } 647 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 648 sf->graphset = PETSC_TRUE; 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 /*@ 653 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 654 655 Collective 656 657 Input Parameter: 658 . sf - star forest to invert 659 660 Output Parameter: 661 . isf - inverse of `sf` 662 663 Level: advanced 664 665 Notes: 666 All roots must have degree 1. 667 668 The local space may be a permutation, but cannot be sparse. 669 670 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 671 @*/ 672 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 673 { 674 PetscMPIInt rank; 675 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 676 const PetscInt *ilocal; 677 PetscSFNode *roots, *leaves; 678 679 PetscFunctionBegin; 680 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 681 PetscSFCheckGraphSet(sf, 1); 682 PetscAssertPointer(isf, 2); 683 684 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 685 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 686 687 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 688 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 689 for (i = 0; i < maxlocal; i++) { 690 leaves[i].rank = rank; 691 leaves[i].index = i; 692 } 693 for (i = 0; i < nroots; i++) { 694 roots[i].rank = -1; 695 roots[i].index = -1; 696 } 697 PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 698 PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 699 700 /* Check whether our leaves are sparse */ 701 for (i = 0, count = 0; i < nroots; i++) 702 if (roots[i].rank >= 0) count++; 703 if (count == nroots) newilocal = NULL; 704 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 705 for (i = 0, count = 0; i < nroots; i++) { 706 if (roots[i].rank >= 0) { 707 newilocal[count] = i; 708 roots[count].rank = roots[i].rank; 709 roots[count].index = roots[i].index; 710 count++; 711 } 712 } 713 } 714 715 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 716 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 717 PetscCall(PetscFree2(roots, leaves)); 718 PetscFunctionReturn(PETSC_SUCCESS); 719 } 720 721 /*@ 722 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 723 724 Collective 725 726 Input Parameters: 727 + sf - communication object to duplicate 728 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 729 730 Output Parameter: 731 . newsf - new communication object 732 733 Level: beginner 734 735 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 736 @*/ 737 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 738 { 739 PetscSFType type; 740 MPI_Datatype dtype = MPIU_SCALAR; 741 742 PetscFunctionBegin; 743 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 744 PetscValidLogicalCollectiveEnum(sf, opt, 2); 745 PetscAssertPointer(newsf, 3); 746 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 747 PetscCall(PetscSFGetType(sf, &type)); 748 if (type) PetscCall(PetscSFSetType(*newsf, type)); 749 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 750 if (opt == PETSCSF_DUPLICATE_GRAPH) { 751 PetscSFCheckGraphSet(sf, 1); 752 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 753 PetscInt nroots, nleaves; 754 const PetscInt *ilocal; 755 const PetscSFNode *iremote; 756 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 757 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 758 } else { 759 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 760 } 761 } 762 /* Since oldtype is committed, so is newtype, according to MPI */ 763 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 764 (*newsf)->vscat.bs = sf->vscat.bs; 765 (*newsf)->vscat.unit = dtype; 766 (*newsf)->vscat.to_n = sf->vscat.to_n; 767 (*newsf)->vscat.from_n = sf->vscat.from_n; 768 /* Do not copy lsf. Build it on demand since it is rarely used */ 769 770 #if defined(PETSC_HAVE_DEVICE) 771 (*newsf)->backend = sf->backend; 772 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 773 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 774 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 775 #endif 776 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 777 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 778 PetscFunctionReturn(PETSC_SUCCESS); 779 } 780 781 /*@C 782 PetscSFGetGraph - Get the graph specifying a parallel star forest 783 784 Not Collective 785 786 Input Parameter: 787 . sf - star forest 788 789 Output Parameters: 790 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 791 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 792 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 793 - iremote - remote locations of root vertices for each leaf on the current process 794 795 Level: intermediate 796 797 Notes: 798 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 799 800 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 801 802 Fortran Note: 803 Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed 804 805 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 806 @*/ 807 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[]) 808 { 809 PetscFunctionBegin; 810 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 811 if (sf->ops->GetGraph) { 812 PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 813 } else { 814 if (nroots) *nroots = sf->nroots; 815 if (nleaves) *nleaves = sf->nleaves; 816 if (ilocal) *ilocal = sf->mine; 817 if (iremote) *iremote = sf->remote; 818 } 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 /*@ 823 PetscSFGetLeafRange - Get the active leaf ranges 824 825 Not Collective 826 827 Input Parameter: 828 . sf - star forest 829 830 Output Parameters: 831 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 832 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 833 834 Level: developer 835 836 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 837 @*/ 838 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 839 { 840 PetscFunctionBegin; 841 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 842 PetscSFCheckGraphSet(sf, 1); 843 if (minleaf) *minleaf = sf->minleaf; 844 if (maxleaf) *maxleaf = sf->maxleaf; 845 PetscFunctionReturn(PETSC_SUCCESS); 846 } 847 848 /*@ 849 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 850 851 Collective 852 853 Input Parameters: 854 + A - the star forest 855 . obj - Optional object that provides the prefix for the option names 856 - name - command line option 857 858 Level: intermediate 859 860 Note: 861 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 862 863 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 864 @*/ 865 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 866 { 867 PetscFunctionBegin; 868 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 869 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 870 PetscFunctionReturn(PETSC_SUCCESS); 871 } 872 873 /*@ 874 PetscSFView - view a star forest 875 876 Collective 877 878 Input Parameters: 879 + sf - star forest 880 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 881 882 Level: beginner 883 884 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 885 @*/ 886 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 887 { 888 PetscBool iascii; 889 PetscViewerFormat format; 890 891 PetscFunctionBegin; 892 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 893 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 894 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 895 PetscCheckSameComm(sf, 1, viewer, 2); 896 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 897 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 898 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 899 PetscMPIInt rank; 900 PetscInt j; 901 902 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 903 PetscCall(PetscViewerASCIIPushTab(viewer)); 904 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 905 if (!sf->graphset) { 906 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 907 PetscCall(PetscViewerASCIIPopTab(viewer)); 908 PetscFunctionReturn(PETSC_SUCCESS); 909 } 910 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 911 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 912 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 913 for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 914 PetscCall(PetscViewerFlush(viewer)); 915 PetscCall(PetscViewerGetFormat(viewer, &format)); 916 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 917 PetscMPIInt *tmpranks, *perm; 918 919 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 920 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 921 for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i; 922 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 923 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 924 for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) { 925 PetscMPIInt i = perm[ii]; 926 927 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 928 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 929 } 930 PetscCall(PetscFree2(tmpranks, perm)); 931 } 932 PetscCall(PetscViewerFlush(viewer)); 933 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 934 } 935 PetscCall(PetscViewerASCIIPopTab(viewer)); 936 } 937 PetscTryTypeMethod(sf, View, viewer); 938 PetscFunctionReturn(PETSC_SUCCESS); 939 } 940 941 /*@C 942 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 943 944 Not Collective 945 946 Input Parameter: 947 . sf - star forest 948 949 Output Parameters: 950 + nranks - number of ranks referenced by local part 951 . ranks - [`nranks`] array of ranks 952 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 953 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL` 954 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL` 955 956 Level: developer 957 958 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 959 @*/ 960 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[]) 961 { 962 PetscFunctionBegin; 963 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 964 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 965 if (sf->ops->GetRootRanks) { 966 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 967 } else { 968 /* The generic implementation */ 969 if (nranks) *nranks = sf->nranks; 970 if (ranks) *ranks = sf->ranks; 971 if (roffset) *roffset = sf->roffset; 972 if (rmine) *rmine = sf->rmine; 973 if (rremote) *rremote = sf->rremote; 974 } 975 PetscFunctionReturn(PETSC_SUCCESS); 976 } 977 978 /*@C 979 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 980 981 Not Collective 982 983 Input Parameter: 984 . sf - star forest 985 986 Output Parameters: 987 + niranks - number of leaf ranks referencing roots on this process 988 . iranks - [`niranks`] array of ranks 989 . ioffset - [`niranks`+1] offset in `irootloc` for each rank 990 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 991 992 Level: developer 993 994 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 995 @*/ 996 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[]) 997 { 998 PetscFunctionBegin; 999 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1000 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 1001 if (sf->ops->GetLeafRanks) { 1002 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 1003 } else { 1004 PetscSFType type; 1005 PetscCall(PetscSFGetType(sf, &type)); 1006 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1007 } 1008 PetscFunctionReturn(PETSC_SUCCESS); 1009 } 1010 1011 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 1012 { 1013 PetscInt i; 1014 for (i = 0; i < n; i++) { 1015 if (needle == list[i]) return PETSC_TRUE; 1016 } 1017 return PETSC_FALSE; 1018 } 1019 1020 /*@C 1021 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 1022 1023 Collective 1024 1025 Input Parameters: 1026 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1027 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1028 1029 Level: developer 1030 1031 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 1032 @*/ 1033 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1034 { 1035 PetscHMapI table; 1036 PetscHashIter pos; 1037 PetscMPIInt size, groupsize, *groupranks, *ranks; 1038 PetscInt *rcount; 1039 PetscInt irank, sfnrank, ranksi; 1040 PetscMPIInt i, orank = -1; 1041 1042 PetscFunctionBegin; 1043 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1044 PetscSFCheckGraphSet(sf, 1); 1045 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1046 PetscCall(PetscHMapICreateWithSize(10, &table)); 1047 for (i = 0; i < sf->nleaves; i++) { 1048 /* Log 1-based rank */ 1049 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1050 } 1051 PetscCall(PetscHMapIGetSize(table, &sfnrank)); 1052 PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks)); 1053 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1054 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1055 PetscHashIterBegin(table, pos); 1056 for (i = 0; i < sf->nranks; i++) { 1057 PetscHashIterGetKey(table, pos, ranksi); 1058 PetscCall(PetscMPIIntCast(ranksi, &ranks[i])); 1059 PetscHashIterGetVal(table, pos, rcount[i]); 1060 PetscHashIterNext(table, pos); 1061 ranks[i]--; /* Convert back to 0-based */ 1062 } 1063 PetscCall(PetscHMapIDestroy(&table)); 1064 1065 /* We expect that dgroup is reliably "small" while nranks could be large */ 1066 { 1067 MPI_Group group = MPI_GROUP_NULL; 1068 PetscMPIInt *dgroupranks; 1069 1070 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1071 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1072 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1073 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1074 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1075 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1076 PetscCallMPI(MPI_Group_free(&group)); 1077 PetscCall(PetscFree(dgroupranks)); 1078 } 1079 1080 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1081 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1082 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1083 if (InList(ranks[i], groupsize, groupranks)) break; 1084 } 1085 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1086 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1087 } 1088 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1089 PetscMPIInt tmprank; 1090 PetscInt tmpcount; 1091 1092 tmprank = ranks[i]; 1093 tmpcount = rcount[i]; 1094 ranks[i] = ranks[sf->ndranks]; 1095 rcount[i] = rcount[sf->ndranks]; 1096 ranks[sf->ndranks] = tmprank; 1097 rcount[sf->ndranks] = tmpcount; 1098 sf->ndranks++; 1099 } 1100 } 1101 PetscCall(PetscFree(groupranks)); 1102 PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount)); 1103 if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1104 sf->roffset[0] = 0; 1105 for (i = 0; i < sf->nranks; i++) { 1106 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1107 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1108 rcount[i] = 0; 1109 } 1110 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1111 /* short circuit */ 1112 if (orank != sf->remote[i].rank) { 1113 /* Search for index of iremote[i].rank in sf->ranks */ 1114 PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank)); 1115 PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank)); 1116 if (irank < 0) { 1117 PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1118 if (irank >= 0) irank += sf->ndranks; 1119 } 1120 } 1121 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank); 1122 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1123 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1124 rcount[irank]++; 1125 } 1126 PetscCall(PetscFree2(rcount, ranks)); 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 /*@C 1131 PetscSFGetGroups - gets incoming and outgoing process groups 1132 1133 Collective 1134 1135 Input Parameter: 1136 . sf - star forest 1137 1138 Output Parameters: 1139 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1140 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1141 1142 Level: developer 1143 1144 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1145 @*/ 1146 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1147 { 1148 MPI_Group group = MPI_GROUP_NULL; 1149 1150 PetscFunctionBegin; 1151 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1152 if (sf->ingroup == MPI_GROUP_NULL) { 1153 PetscInt i; 1154 const PetscInt *indegree; 1155 PetscMPIInt rank, *outranks, *inranks, indegree0; 1156 PetscSFNode *remote; 1157 PetscSF bgcount; 1158 1159 /* Compute the number of incoming ranks */ 1160 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1161 for (i = 0; i < sf->nranks; i++) { 1162 remote[i].rank = sf->ranks[i]; 1163 remote[i].index = 0; 1164 } 1165 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1166 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1167 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1168 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1169 /* Enumerate the incoming ranks */ 1170 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1171 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1172 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1173 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1174 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1175 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1176 PetscCall(PetscMPIIntCast(indegree[0], &indegree0)); 1177 PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup)); 1178 PetscCallMPI(MPI_Group_free(&group)); 1179 PetscCall(PetscFree2(inranks, outranks)); 1180 PetscCall(PetscSFDestroy(&bgcount)); 1181 } 1182 *incoming = sf->ingroup; 1183 1184 if (sf->outgroup == MPI_GROUP_NULL) { 1185 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1186 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1187 PetscCallMPI(MPI_Group_free(&group)); 1188 } 1189 *outgoing = sf->outgroup; 1190 PetscFunctionReturn(PETSC_SUCCESS); 1191 } 1192 1193 /*@ 1194 PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 1195 1196 Collective 1197 1198 Input Parameter: 1199 . sf - star forest 1200 1201 Output Parameter: 1202 . rsf - the star forest with a single root per process to perform communications 1203 1204 Level: developer 1205 1206 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 1207 @*/ 1208 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 1209 { 1210 PetscFunctionBegin; 1211 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1212 PetscAssertPointer(rsf, 2); 1213 if (!sf->rankssf) { 1214 PetscSFNode *rremotes; 1215 const PetscMPIInt *ranks; 1216 PetscMPIInt nranks; 1217 1218 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 1219 PetscCall(PetscMalloc1(nranks, &rremotes)); 1220 for (PetscInt i = 0; i < nranks; i++) { 1221 rremotes[i].rank = ranks[i]; 1222 rremotes[i].index = 0; 1223 } 1224 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 1225 PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 1226 } 1227 *rsf = sf->rankssf; 1228 PetscFunctionReturn(PETSC_SUCCESS); 1229 } 1230 1231 /*@ 1232 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1233 1234 Collective 1235 1236 Input Parameter: 1237 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1238 1239 Output Parameter: 1240 . multi - star forest with split roots, such that each root has degree exactly 1 1241 1242 Level: developer 1243 1244 Note: 1245 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1246 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1247 edge, it is a candidate for future optimization that might involve its removal. 1248 1249 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1250 @*/ 1251 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1252 { 1253 PetscFunctionBegin; 1254 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1255 PetscAssertPointer(multi, 2); 1256 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1257 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1258 *multi = sf->multi; 1259 sf->multi->multi = sf->multi; 1260 PetscFunctionReturn(PETSC_SUCCESS); 1261 } 1262 if (!sf->multi) { 1263 const PetscInt *indegree; 1264 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1265 PetscSFNode *remote; 1266 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1267 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1268 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1269 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1270 inoffset[0] = 0; 1271 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1272 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1273 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1274 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1275 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1276 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1277 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1278 } 1279 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1280 for (i = 0; i < sf->nleaves; i++) { 1281 remote[i].rank = sf->remote[i].rank; 1282 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1283 } 1284 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1285 sf->multi->multi = sf->multi; 1286 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1287 if (sf->rankorder) { /* Sort the ranks */ 1288 PetscMPIInt rank; 1289 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1290 PetscSFNode *newremote; 1291 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1292 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1293 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1294 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1295 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1296 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1297 /* Sort the incoming ranks at each vertex, build the inverse map */ 1298 for (i = 0; i < sf->nroots; i++) { 1299 PetscInt j; 1300 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1301 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1302 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1303 } 1304 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1305 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1306 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1307 for (i = 0; i < sf->nleaves; i++) { 1308 newremote[i].rank = sf->remote[i].rank; 1309 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1310 } 1311 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1312 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1313 } 1314 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1315 } 1316 *multi = sf->multi; 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } 1319 1320 /*@C 1321 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1322 1323 Collective 1324 1325 Input Parameters: 1326 + sf - original star forest 1327 . nselected - number of selected roots on this process 1328 - selected - indices of the selected roots on this process 1329 1330 Output Parameter: 1331 . esf - new star forest 1332 1333 Level: advanced 1334 1335 Note: 1336 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1337 be done by calling PetscSFGetGraph(). 1338 1339 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1340 @*/ 1341 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1342 { 1343 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1344 const PetscInt *ilocal; 1345 signed char *rootdata, *leafdata, *leafmem; 1346 const PetscSFNode *iremote; 1347 PetscSFNode *new_iremote; 1348 MPI_Comm comm; 1349 1350 PetscFunctionBegin; 1351 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1352 PetscSFCheckGraphSet(sf, 1); 1353 if (nselected) PetscAssertPointer(selected, 3); 1354 PetscAssertPointer(esf, 4); 1355 1356 PetscCall(PetscSFSetUp(sf)); 1357 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1358 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1359 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1360 1361 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1362 PetscBool dups; 1363 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1364 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1365 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1366 } 1367 1368 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1369 else { 1370 /* A generic version of creating embedded sf */ 1371 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1372 maxlocal = maxleaf - minleaf + 1; 1373 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1374 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1375 /* Tag selected roots and bcast to leaves */ 1376 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1377 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1378 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1379 1380 /* Build esf with leaves that are still connected */ 1381 esf_nleaves = 0; 1382 for (i = 0; i < nleaves; i++) { 1383 j = ilocal ? ilocal[i] : i; 1384 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1385 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1386 */ 1387 esf_nleaves += (leafdata[j] ? 1 : 0); 1388 } 1389 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1390 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1391 for (i = n = 0; i < nleaves; i++) { 1392 j = ilocal ? ilocal[i] : i; 1393 if (leafdata[j]) { 1394 new_ilocal[n] = j; 1395 new_iremote[n].rank = iremote[i].rank; 1396 new_iremote[n].index = iremote[i].index; 1397 ++n; 1398 } 1399 } 1400 PetscCall(PetscSFCreate(comm, esf)); 1401 PetscCall(PetscSFSetFromOptions(*esf)); 1402 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1403 PetscCall(PetscFree2(rootdata, leafmem)); 1404 } 1405 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 /*@C 1410 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1411 1412 Collective 1413 1414 Input Parameters: 1415 + sf - original star forest 1416 . nselected - number of selected leaves on this process 1417 - selected - indices of the selected leaves on this process 1418 1419 Output Parameter: 1420 . newsf - new star forest 1421 1422 Level: advanced 1423 1424 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1425 @*/ 1426 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1427 { 1428 const PetscSFNode *iremote; 1429 PetscSFNode *new_iremote; 1430 const PetscInt *ilocal; 1431 PetscInt i, nroots, *leaves, *new_ilocal; 1432 MPI_Comm comm; 1433 1434 PetscFunctionBegin; 1435 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1436 PetscSFCheckGraphSet(sf, 1); 1437 if (nselected) PetscAssertPointer(selected, 3); 1438 PetscAssertPointer(newsf, 4); 1439 1440 /* Uniq selected[] and put results in leaves[] */ 1441 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1442 PetscCall(PetscMalloc1(nselected, &leaves)); 1443 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1444 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1445 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1446 1447 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1448 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1449 else { 1450 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1451 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1452 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1453 for (i = 0; i < nselected; ++i) { 1454 const PetscInt l = leaves[i]; 1455 new_ilocal[i] = ilocal ? ilocal[l] : l; 1456 new_iremote[i].rank = iremote[l].rank; 1457 new_iremote[i].index = iremote[l].index; 1458 } 1459 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1460 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1461 } 1462 PetscCall(PetscFree(leaves)); 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 /*@C 1467 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1468 1469 Collective 1470 1471 Input Parameters: 1472 + sf - star forest on which to communicate 1473 . unit - data type associated with each node 1474 . rootdata - buffer to broadcast 1475 - op - operation to use for reduction 1476 1477 Output Parameter: 1478 . leafdata - buffer to be reduced with values from each leaf's respective root 1479 1480 Level: intermediate 1481 1482 Note: 1483 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1484 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1485 use `PetscSFBcastWithMemTypeBegin()` instead. 1486 1487 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1488 @*/ 1489 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1490 { 1491 PetscMemType rootmtype, leafmtype; 1492 1493 PetscFunctionBegin; 1494 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1495 PetscCall(PetscSFSetUp(sf)); 1496 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1497 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1498 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1499 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1500 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1501 PetscFunctionReturn(PETSC_SUCCESS); 1502 } 1503 1504 /*@C 1505 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1506 to `PetscSFBcastEnd()` 1507 1508 Collective 1509 1510 Input Parameters: 1511 + sf - star forest on which to communicate 1512 . unit - data type associated with each node 1513 . rootmtype - memory type of rootdata 1514 . rootdata - buffer to broadcast 1515 . leafmtype - memory type of leafdata 1516 - op - operation to use for reduction 1517 1518 Output Parameter: 1519 . leafdata - buffer to be reduced with values from each leaf's respective root 1520 1521 Level: intermediate 1522 1523 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1524 @*/ 1525 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1526 { 1527 PetscFunctionBegin; 1528 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1529 PetscCall(PetscSFSetUp(sf)); 1530 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1531 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1532 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1533 PetscFunctionReturn(PETSC_SUCCESS); 1534 } 1535 1536 /*@C 1537 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1538 1539 Collective 1540 1541 Input Parameters: 1542 + sf - star forest 1543 . unit - data type 1544 . rootdata - buffer to broadcast 1545 - op - operation to use for reduction 1546 1547 Output Parameter: 1548 . leafdata - buffer to be reduced with values from each leaf's respective root 1549 1550 Level: intermediate 1551 1552 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1553 @*/ 1554 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1555 { 1556 PetscFunctionBegin; 1557 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1558 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1559 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1560 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1561 PetscFunctionReturn(PETSC_SUCCESS); 1562 } 1563 1564 /*@C 1565 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1566 1567 Collective 1568 1569 Input Parameters: 1570 + sf - star forest 1571 . unit - data type 1572 . leafdata - values to reduce 1573 - op - reduction operation 1574 1575 Output Parameter: 1576 . rootdata - result of reduction of values from all leaves of each root 1577 1578 Level: intermediate 1579 1580 Note: 1581 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1582 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1583 use `PetscSFReduceWithMemTypeBegin()` instead. 1584 1585 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 1586 @*/ 1587 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1588 { 1589 PetscMemType rootmtype, leafmtype; 1590 1591 PetscFunctionBegin; 1592 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1593 PetscCall(PetscSFSetUp(sf)); 1594 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1595 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1596 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1597 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1598 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 /*@C 1603 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1604 1605 Collective 1606 1607 Input Parameters: 1608 + sf - star forest 1609 . unit - data type 1610 . leafmtype - memory type of leafdata 1611 . leafdata - values to reduce 1612 . rootmtype - memory type of rootdata 1613 - op - reduction operation 1614 1615 Output Parameter: 1616 . rootdata - result of reduction of values from all leaves of each root 1617 1618 Level: intermediate 1619 1620 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1621 @*/ 1622 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1623 { 1624 PetscFunctionBegin; 1625 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1626 PetscCall(PetscSFSetUp(sf)); 1627 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1628 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1629 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1630 PetscFunctionReturn(PETSC_SUCCESS); 1631 } 1632 1633 /*@C 1634 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1635 1636 Collective 1637 1638 Input Parameters: 1639 + sf - star forest 1640 . unit - data type 1641 . leafdata - values to reduce 1642 - op - reduction operation 1643 1644 Output Parameter: 1645 . rootdata - result of reduction of values from all leaves of each root 1646 1647 Level: intermediate 1648 1649 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1650 @*/ 1651 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1652 { 1653 PetscFunctionBegin; 1654 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1655 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1656 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1657 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1658 PetscFunctionReturn(PETSC_SUCCESS); 1659 } 1660 1661 /*@C 1662 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1663 to be completed with `PetscSFFetchAndOpEnd()` 1664 1665 Collective 1666 1667 Input Parameters: 1668 + sf - star forest 1669 . unit - data type 1670 . leafdata - leaf values to use in reduction 1671 - op - operation to use for reduction 1672 1673 Output Parameters: 1674 + rootdata - root values to be updated, input state is seen by first process to perform an update 1675 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1676 1677 Level: advanced 1678 1679 Note: 1680 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1681 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1682 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1683 integers. 1684 1685 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1686 @*/ 1687 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1688 { 1689 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1690 1691 PetscFunctionBegin; 1692 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1693 PetscCall(PetscSFSetUp(sf)); 1694 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1695 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1696 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1697 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1698 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1699 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1700 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1701 PetscFunctionReturn(PETSC_SUCCESS); 1702 } 1703 1704 /*@C 1705 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1706 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1707 1708 Collective 1709 1710 Input Parameters: 1711 + sf - star forest 1712 . unit - data type 1713 . rootmtype - memory type of rootdata 1714 . leafmtype - memory type of leafdata 1715 . leafdata - leaf values to use in reduction 1716 . leafupdatemtype - memory type of leafupdate 1717 - op - operation to use for reduction 1718 1719 Output Parameters: 1720 + rootdata - root values to be updated, input state is seen by first process to perform an update 1721 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1722 1723 Level: advanced 1724 1725 Note: 1726 See `PetscSFFetchAndOpBegin()` for more details. 1727 1728 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1729 @*/ 1730 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1731 { 1732 PetscFunctionBegin; 1733 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1734 PetscCall(PetscSFSetUp(sf)); 1735 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1736 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1737 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1738 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1739 PetscFunctionReturn(PETSC_SUCCESS); 1740 } 1741 1742 /*@C 1743 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1744 to fetch values from roots and update atomically by applying operation using my leaf value 1745 1746 Collective 1747 1748 Input Parameters: 1749 + sf - star forest 1750 . unit - data type 1751 . leafdata - leaf values to use in reduction 1752 - op - operation to use for reduction 1753 1754 Output Parameters: 1755 + rootdata - root values to be updated, input state is seen by first process to perform an update 1756 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1757 1758 Level: advanced 1759 1760 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1761 @*/ 1762 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1763 { 1764 PetscFunctionBegin; 1765 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1766 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1767 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1768 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1769 PetscFunctionReturn(PETSC_SUCCESS); 1770 } 1771 1772 /*@C 1773 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1774 1775 Collective 1776 1777 Input Parameter: 1778 . sf - star forest 1779 1780 Output Parameter: 1781 . degree - degree of each root vertex 1782 1783 Level: advanced 1784 1785 Note: 1786 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1787 1788 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1789 @*/ 1790 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[]) 1791 { 1792 PetscFunctionBegin; 1793 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1794 PetscSFCheckGraphSet(sf, 1); 1795 PetscAssertPointer(degree, 2); 1796 if (!sf->degreeknown) { 1797 PetscInt i, nroots = sf->nroots, maxlocal; 1798 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1799 maxlocal = sf->maxleaf - sf->minleaf + 1; 1800 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1801 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1802 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1803 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1804 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1805 } 1806 *degree = NULL; 1807 PetscFunctionReturn(PETSC_SUCCESS); 1808 } 1809 1810 /*@C 1811 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1812 1813 Collective 1814 1815 Input Parameter: 1816 . sf - star forest 1817 1818 Output Parameter: 1819 . degree - degree of each root vertex 1820 1821 Level: developer 1822 1823 Note: 1824 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1825 1826 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1827 @*/ 1828 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[]) 1829 { 1830 PetscFunctionBegin; 1831 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1832 PetscSFCheckGraphSet(sf, 1); 1833 PetscAssertPointer(degree, 2); 1834 if (!sf->degreeknown) { 1835 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1836 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1837 PetscCall(PetscFree(sf->degreetmp)); 1838 sf->degreeknown = PETSC_TRUE; 1839 } 1840 *degree = sf->degree; 1841 PetscFunctionReturn(PETSC_SUCCESS); 1842 } 1843 1844 /*@C 1845 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1846 Each multi-root is assigned index of the corresponding original root. 1847 1848 Collective 1849 1850 Input Parameters: 1851 + sf - star forest 1852 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1853 1854 Output Parameters: 1855 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1856 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1857 1858 Level: developer 1859 1860 Note: 1861 The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1862 1863 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1864 @*/ 1865 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1866 { 1867 PetscSF msf; 1868 PetscInt k = 0, nroots, nmroots; 1869 1870 PetscFunctionBegin; 1871 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1872 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1873 if (nroots) PetscAssertPointer(degree, 2); 1874 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1875 PetscAssertPointer(multiRootsOrigNumbering, 4); 1876 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1877 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1878 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1879 for (PetscInt i = 0; i < nroots; i++) { 1880 if (!degree[i]) continue; 1881 for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1882 } 1883 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1884 if (nMultiRoots) *nMultiRoots = nmroots; 1885 PetscFunctionReturn(PETSC_SUCCESS); 1886 } 1887 1888 /*@C 1889 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1890 1891 Collective 1892 1893 Input Parameters: 1894 + sf - star forest 1895 . unit - data type 1896 - leafdata - leaf data to gather to roots 1897 1898 Output Parameter: 1899 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1900 1901 Level: intermediate 1902 1903 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1904 @*/ 1905 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1906 { 1907 PetscSF multi = NULL; 1908 1909 PetscFunctionBegin; 1910 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1911 PetscCall(PetscSFSetUp(sf)); 1912 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1913 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1914 PetscFunctionReturn(PETSC_SUCCESS); 1915 } 1916 1917 /*@C 1918 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1919 1920 Collective 1921 1922 Input Parameters: 1923 + sf - star forest 1924 . unit - data type 1925 - leafdata - leaf data to gather to roots 1926 1927 Output Parameter: 1928 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1929 1930 Level: intermediate 1931 1932 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1933 @*/ 1934 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1935 { 1936 PetscSF multi = NULL; 1937 1938 PetscFunctionBegin; 1939 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1940 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1941 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1942 PetscFunctionReturn(PETSC_SUCCESS); 1943 } 1944 1945 /*@C 1946 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1947 1948 Collective 1949 1950 Input Parameters: 1951 + sf - star forest 1952 . unit - data type 1953 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1954 1955 Output Parameter: 1956 . leafdata - leaf data to be update with personal data from each respective root 1957 1958 Level: intermediate 1959 1960 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 1961 @*/ 1962 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1963 { 1964 PetscSF multi = NULL; 1965 1966 PetscFunctionBegin; 1967 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1968 PetscCall(PetscSFSetUp(sf)); 1969 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1970 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1971 PetscFunctionReturn(PETSC_SUCCESS); 1972 } 1973 1974 /*@C 1975 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1976 1977 Collective 1978 1979 Input Parameters: 1980 + sf - star forest 1981 . unit - data type 1982 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1983 1984 Output Parameter: 1985 . leafdata - leaf data to be update with personal data from each respective root 1986 1987 Level: intermediate 1988 1989 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1990 @*/ 1991 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1992 { 1993 PetscSF multi = NULL; 1994 1995 PetscFunctionBegin; 1996 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1997 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1998 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1999 PetscFunctionReturn(PETSC_SUCCESS); 2000 } 2001 2002 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 2003 { 2004 PetscInt i, n, nleaves; 2005 const PetscInt *ilocal = NULL; 2006 PetscHSetI seen; 2007 2008 PetscFunctionBegin; 2009 if (PetscDefined(USE_DEBUG)) { 2010 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 2011 PetscCall(PetscHSetICreate(&seen)); 2012 for (i = 0; i < nleaves; i++) { 2013 const PetscInt leaf = ilocal ? ilocal[i] : i; 2014 PetscCall(PetscHSetIAdd(seen, leaf)); 2015 } 2016 PetscCall(PetscHSetIGetSize(seen, &n)); 2017 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 2018 PetscCall(PetscHSetIDestroy(&seen)); 2019 } 2020 PetscFunctionReturn(PETSC_SUCCESS); 2021 } 2022 2023 /*@ 2024 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2025 2026 Input Parameters: 2027 + sfA - The first `PetscSF` 2028 - sfB - The second `PetscSF` 2029 2030 Output Parameter: 2031 . sfBA - The composite `PetscSF` 2032 2033 Level: developer 2034 2035 Notes: 2036 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2037 forests, i.e. the same leaf is not connected with different roots. 2038 2039 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 2040 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 2041 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 2042 `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 2043 2044 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2045 @*/ 2046 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2047 { 2048 const PetscSFNode *remotePointsA, *remotePointsB; 2049 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 2050 const PetscInt *localPointsA, *localPointsB; 2051 PetscInt *localPointsBA; 2052 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 2053 PetscBool denseB; 2054 2055 PetscFunctionBegin; 2056 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2057 PetscSFCheckGraphSet(sfA, 1); 2058 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2059 PetscSFCheckGraphSet(sfB, 2); 2060 PetscCheckSameComm(sfA, 1, sfB, 2); 2061 PetscAssertPointer(sfBA, 3); 2062 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2063 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2064 2065 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2066 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2067 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2068 numRootsB; otherwise, garbage will be broadcasted. 2069 Example (comm size = 1): 2070 sfA: 0 <- (0, 0) 2071 sfB: 100 <- (0, 0) 2072 101 <- (0, 1) 2073 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2074 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2075 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2076 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2077 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2078 for (i = 0; i < numRootsB; i++) { 2079 reorderedRemotePointsA[i].rank = -1; 2080 reorderedRemotePointsA[i].index = -1; 2081 } 2082 for (i = 0; i < numLeavesA; i++) { 2083 PetscInt localp = localPointsA ? localPointsA[i] : i; 2084 2085 if (localp >= numRootsB) continue; 2086 reorderedRemotePointsA[localp] = remotePointsA[i]; 2087 } 2088 remotePointsA = reorderedRemotePointsA; 2089 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2090 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2091 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2092 leafdataB[i].rank = -1; 2093 leafdataB[i].index = -1; 2094 } 2095 PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2096 PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2097 PetscCall(PetscFree(reorderedRemotePointsA)); 2098 2099 denseB = (PetscBool)!localPointsB; 2100 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2101 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2102 else numLeavesBA++; 2103 } 2104 if (denseB) { 2105 localPointsBA = NULL; 2106 remotePointsBA = leafdataB; 2107 } else { 2108 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2109 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2110 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2111 const PetscInt l = localPointsB ? localPointsB[i] : i; 2112 2113 if (leafdataB[l - minleaf].rank == -1) continue; 2114 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2115 localPointsBA[numLeavesBA] = l; 2116 numLeavesBA++; 2117 } 2118 PetscCall(PetscFree(leafdataB)); 2119 } 2120 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2121 PetscCall(PetscSFSetFromOptions(*sfBA)); 2122 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2123 PetscFunctionReturn(PETSC_SUCCESS); 2124 } 2125 2126 /*@ 2127 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2128 2129 Input Parameters: 2130 + sfA - The first `PetscSF` 2131 - sfB - The second `PetscSF` 2132 2133 Output Parameter: 2134 . sfBA - The composite `PetscSF`. 2135 2136 Level: developer 2137 2138 Notes: 2139 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2140 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2141 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2142 2143 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2144 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2145 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 2146 on `sfA`, then 2147 a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 2148 2149 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2150 @*/ 2151 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2152 { 2153 const PetscSFNode *remotePointsA, *remotePointsB; 2154 PetscSFNode *remotePointsBA; 2155 const PetscInt *localPointsA, *localPointsB; 2156 PetscSFNode *reorderedRemotePointsA = NULL; 2157 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2158 MPI_Op op; 2159 #if defined(PETSC_USE_64BIT_INDICES) 2160 PetscBool iswin; 2161 #endif 2162 2163 PetscFunctionBegin; 2164 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2165 PetscSFCheckGraphSet(sfA, 1); 2166 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2167 PetscSFCheckGraphSet(sfB, 2); 2168 PetscCheckSameComm(sfA, 1, sfB, 2); 2169 PetscAssertPointer(sfBA, 3); 2170 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2171 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2172 2173 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2174 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2175 2176 /* TODO: Check roots of sfB have degree of 1 */ 2177 /* Once we implement it, we can replace the MPI_MAXLOC 2178 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2179 We use MPI_MAXLOC only to have a deterministic output from this routine if 2180 the root condition is not meet. 2181 */ 2182 op = MPI_MAXLOC; 2183 #if defined(PETSC_USE_64BIT_INDICES) 2184 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2185 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2186 if (iswin) op = MPI_REPLACE; 2187 #endif 2188 2189 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2190 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2191 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2192 reorderedRemotePointsA[i].rank = -1; 2193 reorderedRemotePointsA[i].index = -1; 2194 } 2195 if (localPointsA) { 2196 for (i = 0; i < numLeavesA; i++) { 2197 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2198 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2199 } 2200 } else { 2201 for (i = 0; i < numLeavesA; i++) { 2202 if (i > maxleaf || i < minleaf) continue; 2203 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2204 } 2205 } 2206 2207 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2208 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2209 for (i = 0; i < numRootsB; i++) { 2210 remotePointsBA[i].rank = -1; 2211 remotePointsBA[i].index = -1; 2212 } 2213 2214 PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2215 PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2216 PetscCall(PetscFree(reorderedRemotePointsA)); 2217 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2218 if (remotePointsBA[i].rank == -1) continue; 2219 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2220 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2221 localPointsBA[numLeavesBA] = i; 2222 numLeavesBA++; 2223 } 2224 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2225 PetscCall(PetscSFSetFromOptions(*sfBA)); 2226 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 /* 2231 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2232 2233 Input Parameter: 2234 . sf - The global `PetscSF` 2235 2236 Output Parameter: 2237 . out - The local `PetscSF` 2238 2239 .seealso: `PetscSF`, `PetscSFCreate()` 2240 */ 2241 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2242 { 2243 MPI_Comm comm; 2244 PetscMPIInt myrank; 2245 const PetscInt *ilocal; 2246 const PetscSFNode *iremote; 2247 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2248 PetscSFNode *liremote; 2249 PetscSF lsf; 2250 2251 PetscFunctionBegin; 2252 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2253 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2254 else { 2255 PetscMPIInt irank; 2256 2257 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2258 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2259 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2260 2261 /* Find out local edges and build a local SF */ 2262 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2263 for (i = lnleaves = 0; i < nleaves; i++) { 2264 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2265 if (irank == myrank) lnleaves++; 2266 } 2267 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2268 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2269 2270 for (i = j = 0; i < nleaves; i++) { 2271 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2272 if (irank == myrank) { 2273 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2274 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2275 liremote[j].index = iremote[i].index; 2276 j++; 2277 } 2278 } 2279 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2280 PetscCall(PetscSFSetFromOptions(lsf)); 2281 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2282 PetscCall(PetscSFSetUp(lsf)); 2283 *out = lsf; 2284 } 2285 PetscFunctionReturn(PETSC_SUCCESS); 2286 } 2287 2288 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2289 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2290 { 2291 PetscMemType rootmtype, leafmtype; 2292 2293 PetscFunctionBegin; 2294 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2295 PetscCall(PetscSFSetUp(sf)); 2296 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2297 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2298 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2299 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2300 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2301 PetscFunctionReturn(PETSC_SUCCESS); 2302 } 2303 2304 /*@ 2305 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2306 2307 Input Parameters: 2308 + comm - the communicator 2309 . nsfs - the number of input `PetscSF` 2310 . sfs - the array of input `PetscSF` 2311 . rootMode - the root mode specifying how roots are handled 2312 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2313 2314 Output Parameter: 2315 . newsf - The resulting `PetscSF` 2316 2317 Level: advanced 2318 2319 Notes: 2320 The communicator of all `PetscSF`s in `sfs` must be comm. 2321 2322 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2323 2324 The offsets in `leafOffsets` are added to the original leaf indices. 2325 2326 If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2327 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2328 2329 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2330 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2331 2332 All root modes retain the essential connectivity condition. 2333 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2334 Parameter `rootMode` controls how the input root spaces are combined. 2335 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2336 and is also the same in the output `PetscSF`. 2337 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2338 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2339 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2340 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2341 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2342 the original root ranks are ignored. 2343 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2344 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 2345 to keep the load balancing. 2346 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 2347 2348 Example: 2349 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2350 .vb 2351 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2352 for m in {local,global,shared}; do 2353 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2354 done 2355 .ve 2356 we generate two identical `PetscSF`s sf_0 and sf_1, 2357 .vb 2358 PetscSF Object: sf_0 2 MPI processes 2359 type: basic 2360 rank #leaves #roots 2361 [ 0] 4 2 2362 [ 1] 4 2 2363 leaves roots roots in global numbering 2364 ( 0, 0) <- ( 0, 0) = 0 2365 ( 0, 1) <- ( 0, 1) = 1 2366 ( 0, 2) <- ( 1, 0) = 2 2367 ( 0, 3) <- ( 1, 1) = 3 2368 ( 1, 0) <- ( 0, 0) = 0 2369 ( 1, 1) <- ( 0, 1) = 1 2370 ( 1, 2) <- ( 1, 0) = 2 2371 ( 1, 3) <- ( 1, 1) = 3 2372 .ve 2373 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2374 .vb 2375 rootMode = local: 2376 PetscSF Object: result_sf 2 MPI processes 2377 type: basic 2378 rank #leaves #roots 2379 [ 0] 8 4 2380 [ 1] 8 4 2381 leaves roots roots in global numbering 2382 ( 0, 0) <- ( 0, 0) = 0 2383 ( 0, 1) <- ( 0, 1) = 1 2384 ( 0, 2) <- ( 1, 0) = 4 2385 ( 0, 3) <- ( 1, 1) = 5 2386 ( 0, 4) <- ( 0, 2) = 2 2387 ( 0, 5) <- ( 0, 3) = 3 2388 ( 0, 6) <- ( 1, 2) = 6 2389 ( 0, 7) <- ( 1, 3) = 7 2390 ( 1, 0) <- ( 0, 0) = 0 2391 ( 1, 1) <- ( 0, 1) = 1 2392 ( 1, 2) <- ( 1, 0) = 4 2393 ( 1, 3) <- ( 1, 1) = 5 2394 ( 1, 4) <- ( 0, 2) = 2 2395 ( 1, 5) <- ( 0, 3) = 3 2396 ( 1, 6) <- ( 1, 2) = 6 2397 ( 1, 7) <- ( 1, 3) = 7 2398 2399 rootMode = global: 2400 PetscSF Object: result_sf 2 MPI processes 2401 type: basic 2402 rank #leaves #roots 2403 [ 0] 8 4 2404 [ 1] 8 4 2405 leaves roots roots in global numbering 2406 ( 0, 0) <- ( 0, 0) = 0 2407 ( 0, 1) <- ( 0, 1) = 1 2408 ( 0, 2) <- ( 0, 2) = 2 2409 ( 0, 3) <- ( 0, 3) = 3 2410 ( 0, 4) <- ( 1, 0) = 4 2411 ( 0, 5) <- ( 1, 1) = 5 2412 ( 0, 6) <- ( 1, 2) = 6 2413 ( 0, 7) <- ( 1, 3) = 7 2414 ( 1, 0) <- ( 0, 0) = 0 2415 ( 1, 1) <- ( 0, 1) = 1 2416 ( 1, 2) <- ( 0, 2) = 2 2417 ( 1, 3) <- ( 0, 3) = 3 2418 ( 1, 4) <- ( 1, 0) = 4 2419 ( 1, 5) <- ( 1, 1) = 5 2420 ( 1, 6) <- ( 1, 2) = 6 2421 ( 1, 7) <- ( 1, 3) = 7 2422 2423 rootMode = shared: 2424 PetscSF Object: result_sf 2 MPI processes 2425 type: basic 2426 rank #leaves #roots 2427 [ 0] 8 2 2428 [ 1] 8 2 2429 leaves roots roots in global numbering 2430 ( 0, 0) <- ( 0, 0) = 0 2431 ( 0, 1) <- ( 0, 1) = 1 2432 ( 0, 2) <- ( 1, 0) = 2 2433 ( 0, 3) <- ( 1, 1) = 3 2434 ( 0, 4) <- ( 0, 0) = 0 2435 ( 0, 5) <- ( 0, 1) = 1 2436 ( 0, 6) <- ( 1, 0) = 2 2437 ( 0, 7) <- ( 1, 1) = 3 2438 ( 1, 0) <- ( 0, 0) = 0 2439 ( 1, 1) <- ( 0, 1) = 1 2440 ( 1, 2) <- ( 1, 0) = 2 2441 ( 1, 3) <- ( 1, 1) = 3 2442 ( 1, 4) <- ( 0, 0) = 0 2443 ( 1, 5) <- ( 0, 1) = 1 2444 ( 1, 6) <- ( 1, 0) = 2 2445 ( 1, 7) <- ( 1, 1) = 3 2446 .ve 2447 2448 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2449 @*/ 2450 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2451 { 2452 PetscInt i, s, nLeaves, nRoots; 2453 PetscInt *leafArrayOffsets; 2454 PetscInt *ilocal_new; 2455 PetscSFNode *iremote_new; 2456 PetscBool all_ilocal_null = PETSC_FALSE; 2457 PetscLayout glayout = NULL; 2458 PetscInt *gremote = NULL; 2459 PetscMPIInt rank, size; 2460 2461 PetscFunctionBegin; 2462 if (PetscDefined(USE_DEBUG)) { 2463 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2464 2465 PetscCall(PetscSFCreate(comm, &dummy)); 2466 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2467 PetscAssertPointer(sfs, 3); 2468 for (i = 0; i < nsfs; i++) { 2469 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2470 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2471 } 2472 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2473 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2474 PetscAssertPointer(newsf, 6); 2475 PetscCall(PetscSFDestroy(&dummy)); 2476 } 2477 if (!nsfs) { 2478 PetscCall(PetscSFCreate(comm, newsf)); 2479 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2480 PetscFunctionReturn(PETSC_SUCCESS); 2481 } 2482 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2483 PetscCallMPI(MPI_Comm_size(comm, &size)); 2484 2485 /* Calculate leaf array offsets */ 2486 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2487 leafArrayOffsets[0] = 0; 2488 for (s = 0; s < nsfs; s++) { 2489 PetscInt nl; 2490 2491 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2492 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2493 } 2494 nLeaves = leafArrayOffsets[nsfs]; 2495 2496 /* Calculate number of roots */ 2497 switch (rootMode) { 2498 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2499 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2500 if (PetscDefined(USE_DEBUG)) { 2501 for (s = 1; s < nsfs; s++) { 2502 PetscInt nr; 2503 2504 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2505 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2506 } 2507 } 2508 } break; 2509 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2510 /* Calculate also global layout in this case */ 2511 PetscInt *nls; 2512 PetscLayout *lts; 2513 PetscInt **inds; 2514 PetscInt j; 2515 PetscInt rootOffset = 0; 2516 2517 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2518 PetscCall(PetscLayoutCreate(comm, &glayout)); 2519 glayout->bs = 1; 2520 glayout->n = 0; 2521 glayout->N = 0; 2522 for (s = 0; s < nsfs; s++) { 2523 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2524 glayout->n += lts[s]->n; 2525 glayout->N += lts[s]->N; 2526 } 2527 PetscCall(PetscLayoutSetUp(glayout)); 2528 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2529 for (s = 0, j = 0; s < nsfs; s++) { 2530 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2531 rootOffset += lts[s]->N; 2532 PetscCall(PetscLayoutDestroy(<s[s])); 2533 PetscCall(PetscFree(inds[s])); 2534 } 2535 PetscCall(PetscFree3(lts, nls, inds)); 2536 nRoots = glayout->N; 2537 } break; 2538 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2539 /* nRoots calculated later in this case */ 2540 break; 2541 default: 2542 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2543 } 2544 2545 if (!leafOffsets) { 2546 all_ilocal_null = PETSC_TRUE; 2547 for (s = 0; s < nsfs; s++) { 2548 const PetscInt *ilocal; 2549 2550 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2551 if (ilocal) { 2552 all_ilocal_null = PETSC_FALSE; 2553 break; 2554 } 2555 } 2556 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2557 } 2558 2559 /* Renumber and concatenate local leaves */ 2560 ilocal_new = NULL; 2561 if (!all_ilocal_null) { 2562 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2563 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2564 for (s = 0; s < nsfs; s++) { 2565 const PetscInt *ilocal; 2566 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2567 PetscInt i, nleaves_l; 2568 2569 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2570 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2571 } 2572 } 2573 2574 /* Renumber and concatenate remote roots */ 2575 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2576 PetscInt rootOffset = 0; 2577 2578 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2579 for (i = 0; i < nLeaves; i++) { 2580 iremote_new[i].rank = -1; 2581 iremote_new[i].index = -1; 2582 } 2583 for (s = 0; s < nsfs; s++) { 2584 PetscInt i, nl, nr; 2585 PetscSF tmp_sf; 2586 const PetscSFNode *iremote; 2587 PetscSFNode *tmp_rootdata; 2588 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2589 2590 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2591 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2592 /* create helper SF with contiguous leaves */ 2593 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2594 PetscCall(PetscSFSetUp(tmp_sf)); 2595 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2596 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2597 for (i = 0; i < nr; i++) { 2598 tmp_rootdata[i].index = i + rootOffset; 2599 tmp_rootdata[i].rank = rank; 2600 } 2601 rootOffset += nr; 2602 } else { 2603 for (i = 0; i < nr; i++) { 2604 tmp_rootdata[i].index = i; 2605 tmp_rootdata[i].rank = rank; 2606 } 2607 } 2608 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2609 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2610 PetscCall(PetscSFDestroy(&tmp_sf)); 2611 PetscCall(PetscFree(tmp_rootdata)); 2612 } 2613 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2614 2615 /* Build the new SF */ 2616 PetscCall(PetscSFCreate(comm, newsf)); 2617 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2618 } else { 2619 /* Build the new SF */ 2620 PetscCall(PetscSFCreate(comm, newsf)); 2621 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2622 } 2623 PetscCall(PetscSFSetUp(*newsf)); 2624 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2625 PetscCall(PetscLayoutDestroy(&glayout)); 2626 PetscCall(PetscFree(gremote)); 2627 PetscCall(PetscFree(leafArrayOffsets)); 2628 PetscFunctionReturn(PETSC_SUCCESS); 2629 } 2630 2631 /*@ 2632 PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications. 2633 2634 Collective 2635 2636 Input Parameters: 2637 + sf - star forest 2638 . unit - the data type contained within the root and leaf data 2639 . rootdata - root data that will be used for multiple PetscSF communications 2640 - leafdata - leaf data that will be used for multiple PetscSF communications 2641 2642 Level: advanced 2643 2644 Notes: 2645 Implementations of `PetscSF` can make optimizations 2646 for repeated communication using the same memory regions, but these optimizations 2647 can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed. 2648 The intended pattern is 2649 2650 .vb 2651 PetscMalloc2(nroots, &rootdata, nleaves, &leafdata); 2652 2653 PetscSFRegisterPersistent(sf, unit, rootdata, leafdata); 2654 // repeated use of rootdata and leafdata will now be optimized 2655 2656 PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE); 2657 PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE); 2658 // ... 2659 PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM); 2660 PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM); 2661 // ... (other communications) 2662 2663 // rootdata and leafdata must be deregistered before freeing 2664 // skipping this can lead to undefined behavior including 2665 // deadlocks 2666 PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata); 2667 2668 // it is now safe to free rootdata and leafdata 2669 PetscFree2(rootdata, leafdata); 2670 .ve 2671 2672 If you do not register `rootdata` and `leafdata` it will not cause an error, 2673 but optimizations that reduce the setup time for each communication cannot be 2674 made. Currently, the only implementation of `PetscSF` that benefits from 2675 `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default 2676 `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`. 2677 2678 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()` 2679 @*/ 2680 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2681 { 2682 PetscFunctionBegin; 2683 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2684 PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2685 PetscFunctionReturn(PETSC_SUCCESS); 2686 } 2687 2688 /*@ 2689 PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded. 2690 2691 Collective 2692 2693 Input Parameters: 2694 + sf - star forest 2695 . unit - the data type contained within the root and leaf data 2696 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()` 2697 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()` 2698 2699 Level: advanced 2700 2701 Note: 2702 See `PetscSFRegisterPersistent()` for when/how to use this function. 2703 2704 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()` 2705 @*/ 2706 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2707 { 2708 PetscFunctionBegin; 2709 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2710 PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size) 2715 { 2716 MPI_Aint lb, lb_true, bytes, bytes_true; 2717 2718 PetscFunctionBegin; 2719 PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 2720 PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 2721 PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 2722 *size = bytes; 2723 PetscFunctionReturn(PETSC_SUCCESS); 2724 } 2725