1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #include <petscdevice_cuda.h> 9 #endif 10 11 #if defined(PETSC_HAVE_HIP) 12 #include <hip/hip_runtime.h> 13 #endif 14 15 #if defined(PETSC_CLANG_STATIC_ANALYZER) 16 extern void PetscSFCheckGraphSet(PetscSF, int); 17 #else 18 #if defined(PETSC_USE_DEBUG) 19 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 20 #else 21 #define PetscSFCheckGraphSet(sf, arg) \ 22 do { \ 23 } while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 29 30 /*@ 31 PetscSFCreate - create a star forest communication context 32 33 Collective 34 35 Input Parameter: 36 . comm - communicator on which the star forest will operate 37 38 Output Parameter: 39 . sf - new star forest context 40 41 Options Database Key: 42 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 43 . -sf_type window - Use MPI-3 one-sided window for communication 44 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 46 47 Level: intermediate 48 49 Note: 50 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 51 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 52 `SF`s are optimized and they have better performance than the general `SF`s. 53 54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 55 @*/ 56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 57 { 58 PetscSF b; 59 60 PetscFunctionBegin; 61 PetscAssertPointer(sf, 2); 62 PetscCall(PetscSFInitializePackage()); 63 64 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 65 b->nroots = -1; 66 b->nleaves = -1; 67 b->minleaf = PETSC_INT_MAX; 68 b->maxleaf = PETSC_INT_MIN; 69 b->nranks = -1; 70 b->rankorder = PETSC_TRUE; 71 b->ingroup = MPI_GROUP_NULL; 72 b->outgroup = MPI_GROUP_NULL; 73 b->graphset = PETSC_FALSE; 74 #if defined(PETSC_HAVE_DEVICE) 75 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 76 b->use_stream_aware_mpi = PETSC_FALSE; 77 b->unknown_input_stream = PETSC_FALSE; 78 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 79 b->backend = PETSCSF_BACKEND_KOKKOS; 80 #elif defined(PETSC_HAVE_CUDA) 81 b->backend = PETSCSF_BACKEND_CUDA; 82 #elif defined(PETSC_HAVE_HIP) 83 b->backend = PETSCSF_BACKEND_HIP; 84 #endif 85 86 #if defined(PETSC_HAVE_NVSHMEM) 87 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 88 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 89 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 90 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 91 #endif 92 #endif 93 b->vscat.from_n = -1; 94 b->vscat.to_n = -1; 95 b->vscat.unit = MPIU_SCALAR; 96 *sf = b; 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 /*@ 101 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 102 103 Collective 104 105 Input Parameter: 106 . sf - star forest 107 108 Level: advanced 109 110 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 111 @*/ 112 PetscErrorCode PetscSFReset(PetscSF sf) 113 { 114 PetscFunctionBegin; 115 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 116 PetscTryTypeMethod(sf, Reset); 117 PetscCall(PetscSFDestroy(&sf->rankssf)); 118 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_INT_MAX; 122 sf->maxleaf = PETSC_INT_MIN; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 PetscCall(PetscFree(sf->mine_alloc)); 127 PetscCall(PetscFree(sf->remote_alloc)); 128 sf->nranks = -1; 129 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 130 sf->degreeknown = PETSC_FALSE; 131 PetscCall(PetscFree(sf->degree)); 132 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 133 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 134 135 if (sf->multi) sf->multi->multi = NULL; 136 PetscCall(PetscSFDestroy(&sf->multi)); 137 138 PetscCall(PetscLayoutDestroy(&sf->map)); 139 140 #if defined(PETSC_HAVE_DEVICE) 141 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 142 #endif 143 144 sf->setupcalled = PETSC_FALSE; 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 /*@ 149 PetscSFSetType - Set the `PetscSF` communication implementation 150 151 Collective 152 153 Input Parameters: 154 + sf - the `PetscSF` context 155 - type - a known method 156 .vb 157 PETSCSFWINDOW - MPI-2/3 one-sided 158 PETSCSFBASIC - basic implementation using MPI-1 two-sided 159 .ve 160 161 Options Database Key: 162 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 163 164 Level: intermediate 165 166 Notes: 167 See `PetscSFType` for possible values 168 169 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 170 @*/ 171 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 172 { 173 PetscBool match; 174 PetscErrorCode (*r)(PetscSF); 175 176 PetscFunctionBegin; 177 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 178 PetscAssertPointer(type, 2); 179 180 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 181 if (match) PetscFunctionReturn(PETSC_SUCCESS); 182 183 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 184 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 185 /* Destroy the previous PetscSF implementation context */ 186 PetscTryTypeMethod(sf, Destroy); 187 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 188 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 189 PetscCall((*r)(sf)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 /*@ 194 PetscSFGetType - Get the `PetscSF` communication implementation 195 196 Not Collective 197 198 Input Parameter: 199 . sf - the `PetscSF` context 200 201 Output Parameter: 202 . type - the `PetscSF` type name 203 204 Level: intermediate 205 206 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 207 @*/ 208 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 209 { 210 PetscFunctionBegin; 211 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 212 PetscAssertPointer(type, 2); 213 *type = ((PetscObject)sf)->type_name; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 /*@ 218 PetscSFDestroy - destroy a star forest 219 220 Collective 221 222 Input Parameter: 223 . sf - address of star forest 224 225 Level: intermediate 226 227 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 228 @*/ 229 PetscErrorCode PetscSFDestroy(PetscSF *sf) 230 { 231 PetscFunctionBegin; 232 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 233 PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 234 if (--((PetscObject)*sf)->refct > 0) { 235 *sf = NULL; 236 PetscFunctionReturn(PETSC_SUCCESS); 237 } 238 PetscCall(PetscSFReset(*sf)); 239 PetscTryTypeMethod(*sf, Destroy); 240 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 241 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 242 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 243 if ((*sf)->use_stream_aware_mpi) { 244 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 245 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 246 } 247 #endif 248 PetscCall(PetscHeaderDestroy(sf)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 253 { 254 PetscInt i, nleaves; 255 PetscMPIInt size; 256 const PetscInt *ilocal; 257 const PetscSFNode *iremote; 258 259 PetscFunctionBegin; 260 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 261 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 262 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 263 for (i = 0; i < nleaves; i++) { 264 const PetscInt rank = iremote[i].rank; 265 const PetscInt remote = iremote[i].index; 266 const PetscInt leaf = ilocal ? ilocal[i] : i; 267 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 268 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 269 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 270 } 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 /*@ 275 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 276 277 Collective 278 279 Input Parameter: 280 . sf - star forest communication object 281 282 Level: beginner 283 284 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 285 @*/ 286 PetscErrorCode PetscSFSetUp(PetscSF sf) 287 { 288 PetscFunctionBegin; 289 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 290 PetscSFCheckGraphSet(sf, 1); 291 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 292 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 293 PetscCall(PetscSFCheckGraphValid_Private(sf)); 294 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 295 PetscTryTypeMethod(sf, SetUp); 296 #if defined(PETSC_HAVE_CUDA) 297 if (sf->backend == PETSCSF_BACKEND_CUDA) { 298 sf->ops->Malloc = PetscSFMalloc_CUDA; 299 sf->ops->Free = PetscSFFree_CUDA; 300 } 301 #endif 302 #if defined(PETSC_HAVE_HIP) 303 if (sf->backend == PETSCSF_BACKEND_HIP) { 304 sf->ops->Malloc = PetscSFMalloc_HIP; 305 sf->ops->Free = PetscSFFree_HIP; 306 } 307 #endif 308 309 #if defined(PETSC_HAVE_KOKKOS) 310 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 311 sf->ops->Malloc = PetscSFMalloc_Kokkos; 312 sf->ops->Free = PetscSFFree_Kokkos; 313 } 314 #endif 315 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 316 sf->setupcalled = PETSC_TRUE; 317 PetscFunctionReturn(PETSC_SUCCESS); 318 } 319 320 /*@ 321 PetscSFSetFromOptions - set `PetscSF` options using the options database 322 323 Logically Collective 324 325 Input Parameter: 326 . sf - star forest 327 328 Options Database Keys: 329 + -sf_type - implementation type, see `PetscSFSetType()` 330 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 331 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 332 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 333 If true, this option only works with `-use_gpu_aware_mpi 1`. 334 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 335 If true, this option only works with `-use_gpu_aware_mpi 1`. 336 337 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 338 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 339 the only available is kokkos. 340 341 Level: intermediate 342 343 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 344 @*/ 345 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 346 { 347 PetscSFType deft; 348 char type[256]; 349 PetscBool flg; 350 351 PetscFunctionBegin; 352 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 353 PetscObjectOptionsBegin((PetscObject)sf); 354 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 355 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 356 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 357 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 358 #if defined(PETSC_HAVE_DEVICE) 359 { 360 char backendstr[32] = {0}; 361 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 362 /* Change the defaults set in PetscSFCreate() with command line options */ 363 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 364 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 365 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 366 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 367 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 368 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 369 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 370 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 371 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 372 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 373 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 374 #elif defined(PETSC_HAVE_KOKKOS) 375 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 376 #endif 377 378 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 379 if (sf->use_stream_aware_mpi) { 380 MPI_Info info; 381 382 PetscCallMPI(MPI_Info_create(&info)); 383 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 384 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 385 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 386 PetscCallMPI(MPI_Info_free(&info)); 387 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 388 } 389 #endif 390 } 391 #endif 392 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 393 PetscOptionsEnd(); 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 397 /*@ 398 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 399 400 Logically Collective 401 402 Input Parameters: 403 + sf - star forest 404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 405 406 Level: advanced 407 408 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 409 @*/ 410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 411 { 412 PetscFunctionBegin; 413 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 414 PetscValidLogicalCollectiveBool(sf, flg, 2); 415 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 416 sf->rankorder = flg; 417 PetscFunctionReturn(PETSC_SUCCESS); 418 } 419 420 /*@ 421 PetscSFSetGraph - Set a parallel star forest 422 423 Collective 424 425 Input Parameters: 426 + sf - star forest 427 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 428 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 429 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 430 during setup in debug mode) 431 . localmode - copy mode for `ilocal` 432 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 433 during setup in debug mode) 434 - remotemode - copy mode for `iremote` 435 436 Level: intermediate 437 438 Notes: 439 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 440 441 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 442 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 443 PETSc might modify the respective array; 444 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 445 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 446 447 Fortran Notes: 448 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 449 450 Developer Notes: 451 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 452 This also allows to compare leaf sets of two `PetscSF`s easily. 453 454 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 455 @*/ 456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 457 { 458 PetscBool unique, contiguous; 459 460 PetscFunctionBegin; 461 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 462 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 463 if (nleaves > 0) PetscAssertPointer(iremote, 6); 464 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 465 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 466 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 467 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 468 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 469 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 470 471 if (sf->nroots >= 0) { /* Reset only if graph already set */ 472 PetscCall(PetscSFReset(sf)); 473 } 474 475 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 476 if (PetscDefined(USE_DEBUG)) { 477 PetscMPIInt size; 478 479 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 480 for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); } 481 } 482 483 sf->nroots = nroots; 484 sf->nleaves = nleaves; 485 486 if (localmode == PETSC_COPY_VALUES && ilocal) { 487 PetscInt *tlocal = NULL; 488 489 PetscCall(PetscMalloc1(nleaves, &tlocal)); 490 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 491 ilocal = tlocal; 492 } 493 if (remotemode == PETSC_COPY_VALUES) { 494 PetscSFNode *tremote = NULL; 495 496 PetscCall(PetscMalloc1(nleaves, &tremote)); 497 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 498 iremote = tremote; 499 } 500 501 if (nleaves && ilocal) { 502 PetscSFNode work; 503 504 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 505 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 506 unique = PetscNot(unique); 507 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 508 sf->minleaf = ilocal[0]; 509 sf->maxleaf = ilocal[nleaves - 1]; 510 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 511 } else { 512 sf->minleaf = 0; 513 sf->maxleaf = nleaves - 1; 514 unique = PETSC_TRUE; 515 contiguous = PETSC_TRUE; 516 } 517 518 if (contiguous) { 519 if (localmode == PETSC_USE_POINTER) { 520 ilocal = NULL; 521 } else { 522 PetscCall(PetscFree(ilocal)); 523 } 524 } 525 sf->mine = ilocal; 526 if (localmode == PETSC_USE_POINTER) { 527 sf->mine_alloc = NULL; 528 } else { 529 sf->mine_alloc = ilocal; 530 } 531 if (PetscDefined(USE_DEBUG)) { 532 PetscMPIInt size; 533 534 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 535 for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); } 536 } 537 sf->remote = iremote; 538 if (remotemode == PETSC_USE_POINTER) { 539 sf->remote_alloc = NULL; 540 } else { 541 sf->remote_alloc = iremote; 542 } 543 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 544 sf->graphset = PETSC_TRUE; 545 PetscFunctionReturn(PETSC_SUCCESS); 546 } 547 548 /*@ 549 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 550 551 Collective 552 553 Input Parameters: 554 + sf - The `PetscSF` 555 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 556 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 557 558 Level: intermediate 559 560 Notes: 561 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 562 `n` and `N` are the local and global sizes of `x` respectively. 563 564 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 565 sequential vectors `y` on all MPI processes. 566 567 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 568 sequential vector `y` on rank 0. 569 570 In above cases, entries of `x` are roots and entries of `y` are leaves. 571 572 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 573 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 574 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 575 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 576 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 577 578 In this case, roots and leaves are symmetric. 579 580 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 581 @*/ 582 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 583 { 584 MPI_Comm comm; 585 PetscInt n, N, res[2]; 586 PetscMPIInt rank, size; 587 PetscSFType type; 588 589 PetscFunctionBegin; 590 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 591 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 592 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 593 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 594 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 595 PetscCallMPI(MPI_Comm_size(comm, &size)); 596 597 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 598 type = PETSCSFALLTOALL; 599 PetscCall(PetscLayoutCreate(comm, &sf->map)); 600 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 601 PetscCall(PetscLayoutSetSize(sf->map, (PetscInt)size * size)); 602 PetscCall(PetscLayoutSetUp(sf->map)); 603 } else { 604 PetscCall(PetscLayoutGetLocalSize(map, &n)); 605 PetscCall(PetscLayoutGetSize(map, &N)); 606 res[0] = n; 607 res[1] = -n; 608 /* Check if n are same over all ranks so that we can optimize it */ 609 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 610 if (res[0] == -res[1]) { /* same n */ 611 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 612 } else { 613 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 614 } 615 PetscCall(PetscLayoutReference(map, &sf->map)); 616 } 617 PetscCall(PetscSFSetType(sf, type)); 618 619 sf->pattern = pattern; 620 sf->mine = NULL; /* Contiguous */ 621 622 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 623 Also set other easy stuff. 624 */ 625 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 626 sf->nleaves = N; 627 sf->nroots = n; 628 sf->nranks = size; 629 sf->minleaf = 0; 630 sf->maxleaf = N - 1; 631 } else if (pattern == PETSCSF_PATTERN_GATHER) { 632 sf->nleaves = rank ? 0 : N; 633 sf->nroots = n; 634 sf->nranks = rank ? 0 : size; 635 sf->minleaf = 0; 636 sf->maxleaf = rank ? -1 : N - 1; 637 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 638 sf->nleaves = size; 639 sf->nroots = size; 640 sf->nranks = size; 641 sf->minleaf = 0; 642 sf->maxleaf = size - 1; 643 } 644 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 645 sf->graphset = PETSC_TRUE; 646 PetscFunctionReturn(PETSC_SUCCESS); 647 } 648 649 /*@ 650 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 651 652 Collective 653 654 Input Parameter: 655 . sf - star forest to invert 656 657 Output Parameter: 658 . isf - inverse of `sf` 659 660 Level: advanced 661 662 Notes: 663 All roots must have degree 1. 664 665 The local space may be a permutation, but cannot be sparse. 666 667 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 668 @*/ 669 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 670 { 671 PetscMPIInt rank; 672 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 673 const PetscInt *ilocal; 674 PetscSFNode *roots, *leaves; 675 676 PetscFunctionBegin; 677 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 678 PetscSFCheckGraphSet(sf, 1); 679 PetscAssertPointer(isf, 2); 680 681 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 682 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 683 684 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 685 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 686 for (i = 0; i < maxlocal; i++) { 687 leaves[i].rank = rank; 688 leaves[i].index = i; 689 } 690 for (i = 0; i < nroots; i++) { 691 roots[i].rank = -1; 692 roots[i].index = -1; 693 } 694 PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 695 PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 696 697 /* Check whether our leaves are sparse */ 698 for (i = 0, count = 0; i < nroots; i++) 699 if (roots[i].rank >= 0) count++; 700 if (count == nroots) newilocal = NULL; 701 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 702 for (i = 0, count = 0; i < nroots; i++) { 703 if (roots[i].rank >= 0) { 704 newilocal[count] = i; 705 roots[count].rank = roots[i].rank; 706 roots[count].index = roots[i].index; 707 count++; 708 } 709 } 710 } 711 712 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 713 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 714 PetscCall(PetscFree2(roots, leaves)); 715 PetscFunctionReturn(PETSC_SUCCESS); 716 } 717 718 /*@ 719 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 720 721 Collective 722 723 Input Parameters: 724 + sf - communication object to duplicate 725 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 726 727 Output Parameter: 728 . newsf - new communication object 729 730 Level: beginner 731 732 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 733 @*/ 734 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 735 { 736 PetscSFType type; 737 MPI_Datatype dtype = MPIU_SCALAR; 738 739 PetscFunctionBegin; 740 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 741 PetscValidLogicalCollectiveEnum(sf, opt, 2); 742 PetscAssertPointer(newsf, 3); 743 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 744 PetscCall(PetscSFGetType(sf, &type)); 745 if (type) PetscCall(PetscSFSetType(*newsf, type)); 746 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 747 if (opt == PETSCSF_DUPLICATE_GRAPH) { 748 PetscSFCheckGraphSet(sf, 1); 749 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 750 PetscInt nroots, nleaves; 751 const PetscInt *ilocal; 752 const PetscSFNode *iremote; 753 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 754 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 755 } else { 756 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 757 } 758 } 759 /* Since oldtype is committed, so is newtype, according to MPI */ 760 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 761 (*newsf)->vscat.bs = sf->vscat.bs; 762 (*newsf)->vscat.unit = dtype; 763 (*newsf)->vscat.to_n = sf->vscat.to_n; 764 (*newsf)->vscat.from_n = sf->vscat.from_n; 765 /* Do not copy lsf. Build it on demand since it is rarely used */ 766 767 #if defined(PETSC_HAVE_DEVICE) 768 (*newsf)->backend = sf->backend; 769 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 770 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 771 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 772 #endif 773 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 774 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 775 PetscFunctionReturn(PETSC_SUCCESS); 776 } 777 778 /*@C 779 PetscSFGetGraph - Get the graph specifying a parallel star forest 780 781 Not Collective 782 783 Input Parameter: 784 . sf - star forest 785 786 Output Parameters: 787 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 788 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 789 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 790 - iremote - remote locations of root vertices for each leaf on the current process 791 792 Level: intermediate 793 794 Notes: 795 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 796 797 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 798 799 Fortran Notes: 800 The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you 801 want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array. 802 803 To check for a `NULL` `ilocal` use 804 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 805 806 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 807 @*/ 808 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 809 { 810 PetscFunctionBegin; 811 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 812 if (sf->ops->GetGraph) { 813 PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 814 } else { 815 if (nroots) *nroots = sf->nroots; 816 if (nleaves) *nleaves = sf->nleaves; 817 if (ilocal) *ilocal = sf->mine; 818 if (iremote) *iremote = sf->remote; 819 } 820 PetscFunctionReturn(PETSC_SUCCESS); 821 } 822 823 /*@ 824 PetscSFGetLeafRange - Get the active leaf ranges 825 826 Not Collective 827 828 Input Parameter: 829 . sf - star forest 830 831 Output Parameters: 832 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 833 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 834 835 Level: developer 836 837 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 838 @*/ 839 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 840 { 841 PetscFunctionBegin; 842 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 843 PetscSFCheckGraphSet(sf, 1); 844 if (minleaf) *minleaf = sf->minleaf; 845 if (maxleaf) *maxleaf = sf->maxleaf; 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 /*@ 850 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 851 852 Collective 853 854 Input Parameters: 855 + A - the star forest 856 . obj - Optional object that provides the prefix for the option names 857 - name - command line option 858 859 Level: intermediate 860 861 Note: 862 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 863 864 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 865 @*/ 866 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 867 { 868 PetscFunctionBegin; 869 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 870 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 871 PetscFunctionReturn(PETSC_SUCCESS); 872 } 873 874 /*@ 875 PetscSFView - view a star forest 876 877 Collective 878 879 Input Parameters: 880 + sf - star forest 881 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 882 883 Level: beginner 884 885 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 886 @*/ 887 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 888 { 889 PetscBool iascii; 890 PetscViewerFormat format; 891 892 PetscFunctionBegin; 893 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 894 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 895 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 896 PetscCheckSameComm(sf, 1, viewer, 2); 897 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 898 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 899 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 900 PetscMPIInt rank; 901 PetscInt j; 902 903 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 904 PetscCall(PetscViewerASCIIPushTab(viewer)); 905 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 906 if (!sf->graphset) { 907 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 908 PetscCall(PetscViewerASCIIPopTab(viewer)); 909 PetscFunctionReturn(PETSC_SUCCESS); 910 } 911 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 912 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 913 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 914 for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%d,%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, (PetscMPIInt)sf->remote[i].rank, sf->remote[i].index)); 915 PetscCall(PetscViewerFlush(viewer)); 916 PetscCall(PetscViewerGetFormat(viewer, &format)); 917 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 918 PetscMPIInt *tmpranks, *perm; 919 920 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 921 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 922 for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i; 923 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 924 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 925 for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) { 926 PetscMPIInt i = perm[ii]; 927 928 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 929 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 930 } 931 PetscCall(PetscFree2(tmpranks, perm)); 932 } 933 PetscCall(PetscViewerFlush(viewer)); 934 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 935 } 936 PetscCall(PetscViewerASCIIPopTab(viewer)); 937 } 938 PetscTryTypeMethod(sf, View, viewer); 939 PetscFunctionReturn(PETSC_SUCCESS); 940 } 941 942 /*@C 943 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 944 945 Not Collective 946 947 Input Parameter: 948 . sf - star forest 949 950 Output Parameters: 951 + nranks - number of ranks referenced by local part 952 . ranks - [`nranks`] array of ranks 953 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 954 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL` 955 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL` 956 957 Level: developer 958 959 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 960 @*/ 961 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 962 { 963 PetscFunctionBegin; 964 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 965 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 966 if (sf->ops->GetRootRanks) { 967 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 968 } else { 969 /* The generic implementation */ 970 if (nranks) *nranks = sf->nranks; 971 if (ranks) *ranks = sf->ranks; 972 if (roffset) *roffset = sf->roffset; 973 if (rmine) *rmine = sf->rmine; 974 if (rremote) *rremote = sf->rremote; 975 } 976 PetscFunctionReturn(PETSC_SUCCESS); 977 } 978 979 /*@C 980 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 981 982 Not Collective 983 984 Input Parameter: 985 . sf - star forest 986 987 Output Parameters: 988 + niranks - number of leaf ranks referencing roots on this process 989 . iranks - [`niranks`] array of ranks 990 . ioffset - [`niranks`+1] offset in `irootloc` for each rank 991 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 992 993 Level: developer 994 995 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 996 @*/ 997 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 998 { 999 PetscFunctionBegin; 1000 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1001 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 1002 if (sf->ops->GetLeafRanks) { 1003 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 1004 } else { 1005 PetscSFType type; 1006 PetscCall(PetscSFGetType(sf, &type)); 1007 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1008 } 1009 PetscFunctionReturn(PETSC_SUCCESS); 1010 } 1011 1012 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 1013 { 1014 PetscInt i; 1015 for (i = 0; i < n; i++) { 1016 if (needle == list[i]) return PETSC_TRUE; 1017 } 1018 return PETSC_FALSE; 1019 } 1020 1021 /*@C 1022 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 1023 1024 Collective 1025 1026 Input Parameters: 1027 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1028 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1029 1030 Level: developer 1031 1032 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 1033 @*/ 1034 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1035 { 1036 PetscHMapI table; 1037 PetscHashIter pos; 1038 PetscMPIInt size, groupsize, *groupranks, *ranks; 1039 PetscInt *rcount; 1040 PetscInt irank, sfnrank, ranksi; 1041 PetscMPIInt i, orank = -1; 1042 1043 PetscFunctionBegin; 1044 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1045 PetscSFCheckGraphSet(sf, 1); 1046 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1047 PetscCall(PetscHMapICreateWithSize(10, &table)); 1048 for (i = 0; i < sf->nleaves; i++) { 1049 /* Log 1-based rank */ 1050 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1051 } 1052 PetscCall(PetscHMapIGetSize(table, &sfnrank)); 1053 PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks)); 1054 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1055 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1056 PetscHashIterBegin(table, pos); 1057 for (i = 0; i < sf->nranks; i++) { 1058 PetscHashIterGetKey(table, pos, ranksi); 1059 PetscCall(PetscMPIIntCast(ranksi, &ranks[i])); 1060 PetscHashIterGetVal(table, pos, rcount[i]); 1061 PetscHashIterNext(table, pos); 1062 ranks[i]--; /* Convert back to 0-based */ 1063 } 1064 PetscCall(PetscHMapIDestroy(&table)); 1065 1066 /* We expect that dgroup is reliably "small" while nranks could be large */ 1067 { 1068 MPI_Group group = MPI_GROUP_NULL; 1069 PetscMPIInt *dgroupranks; 1070 1071 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1072 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1073 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1074 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1075 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1076 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1077 PetscCallMPI(MPI_Group_free(&group)); 1078 PetscCall(PetscFree(dgroupranks)); 1079 } 1080 1081 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1082 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1083 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1084 if (InList(ranks[i], groupsize, groupranks)) break; 1085 } 1086 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1087 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1088 } 1089 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1090 PetscMPIInt tmprank; 1091 PetscInt tmpcount; 1092 1093 tmprank = ranks[i]; 1094 tmpcount = rcount[i]; 1095 ranks[i] = ranks[sf->ndranks]; 1096 rcount[i] = rcount[sf->ndranks]; 1097 ranks[sf->ndranks] = tmprank; 1098 rcount[sf->ndranks] = tmpcount; 1099 sf->ndranks++; 1100 } 1101 } 1102 PetscCall(PetscFree(groupranks)); 1103 PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount)); 1104 if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1105 sf->roffset[0] = 0; 1106 for (i = 0; i < sf->nranks; i++) { 1107 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1108 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1109 rcount[i] = 0; 1110 } 1111 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1112 /* short circuit */ 1113 if (orank != sf->remote[i].rank) { 1114 /* Search for index of iremote[i].rank in sf->ranks */ 1115 PetscCall(PetscFindMPIInt((PetscMPIInt)sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1116 if (irank < 0) { 1117 PetscCall(PetscFindMPIInt((PetscMPIInt)sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1118 if (irank >= 0) irank += sf->ndranks; 1119 } 1120 orank = (PetscMPIInt)sf->remote[i].rank; 1121 } 1122 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", (PetscMPIInt)sf->remote[i].rank); 1123 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1124 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1125 rcount[irank]++; 1126 } 1127 PetscCall(PetscFree2(rcount, ranks)); 1128 PetscFunctionReturn(PETSC_SUCCESS); 1129 } 1130 1131 /*@C 1132 PetscSFGetGroups - gets incoming and outgoing process groups 1133 1134 Collective 1135 1136 Input Parameter: 1137 . sf - star forest 1138 1139 Output Parameters: 1140 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1141 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1142 1143 Level: developer 1144 1145 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1146 @*/ 1147 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1148 { 1149 MPI_Group group = MPI_GROUP_NULL; 1150 1151 PetscFunctionBegin; 1152 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1153 if (sf->ingroup == MPI_GROUP_NULL) { 1154 PetscInt i; 1155 const PetscInt *indegree; 1156 PetscMPIInt rank, *outranks, *inranks, indegree0; 1157 PetscSFNode *remote; 1158 PetscSF bgcount; 1159 1160 /* Compute the number of incoming ranks */ 1161 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1162 for (i = 0; i < sf->nranks; i++) { 1163 remote[i].rank = sf->ranks[i]; 1164 remote[i].index = 0; 1165 } 1166 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1167 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1168 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1169 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1170 /* Enumerate the incoming ranks */ 1171 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1172 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1173 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1174 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1175 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1176 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1177 PetscCall(PetscMPIIntCast(indegree[0], &indegree0)); 1178 PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup)); 1179 PetscCallMPI(MPI_Group_free(&group)); 1180 PetscCall(PetscFree2(inranks, outranks)); 1181 PetscCall(PetscSFDestroy(&bgcount)); 1182 } 1183 *incoming = sf->ingroup; 1184 1185 if (sf->outgroup == MPI_GROUP_NULL) { 1186 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1187 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1188 PetscCallMPI(MPI_Group_free(&group)); 1189 } 1190 *outgoing = sf->outgroup; 1191 PetscFunctionReturn(PETSC_SUCCESS); 1192 } 1193 1194 /*@ 1195 PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 1196 1197 Collective 1198 1199 Input Parameter: 1200 . sf - star forest 1201 1202 Output Parameter: 1203 . rsf - the star forest with a single root per process to perform communications 1204 1205 Level: developer 1206 1207 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 1208 @*/ 1209 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 1210 { 1211 PetscFunctionBegin; 1212 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1213 PetscAssertPointer(rsf, 2); 1214 if (!sf->rankssf) { 1215 PetscSFNode *rremotes; 1216 const PetscMPIInt *ranks; 1217 PetscMPIInt nranks; 1218 1219 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 1220 PetscCall(PetscMalloc1(nranks, &rremotes)); 1221 for (PetscInt i = 0; i < nranks; i++) { 1222 rremotes[i].rank = ranks[i]; 1223 rremotes[i].index = 0; 1224 } 1225 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 1226 PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 1227 } 1228 *rsf = sf->rankssf; 1229 PetscFunctionReturn(PETSC_SUCCESS); 1230 } 1231 1232 /*@ 1233 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1234 1235 Collective 1236 1237 Input Parameter: 1238 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1239 1240 Output Parameter: 1241 . multi - star forest with split roots, such that each root has degree exactly 1 1242 1243 Level: developer 1244 1245 Note: 1246 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1247 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1248 edge, it is a candidate for future optimization that might involve its removal. 1249 1250 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1251 @*/ 1252 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1253 { 1254 PetscFunctionBegin; 1255 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1256 PetscAssertPointer(multi, 2); 1257 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1258 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1259 *multi = sf->multi; 1260 sf->multi->multi = sf->multi; 1261 PetscFunctionReturn(PETSC_SUCCESS); 1262 } 1263 if (!sf->multi) { 1264 const PetscInt *indegree; 1265 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1266 PetscSFNode *remote; 1267 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1268 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1269 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1270 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1271 inoffset[0] = 0; 1272 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1273 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1274 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1275 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1276 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1277 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1278 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1279 } 1280 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1281 for (i = 0; i < sf->nleaves; i++) { 1282 remote[i].rank = sf->remote[i].rank; 1283 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1284 } 1285 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1286 sf->multi->multi = sf->multi; 1287 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1288 if (sf->rankorder) { /* Sort the ranks */ 1289 PetscMPIInt rank; 1290 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1291 PetscSFNode *newremote; 1292 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1293 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1294 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1295 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1296 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1297 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1298 /* Sort the incoming ranks at each vertex, build the inverse map */ 1299 for (i = 0; i < sf->nroots; i++) { 1300 PetscInt j; 1301 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1302 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1303 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1304 } 1305 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1306 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1307 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1308 for (i = 0; i < sf->nleaves; i++) { 1309 newremote[i].rank = sf->remote[i].rank; 1310 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1311 } 1312 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1313 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1314 } 1315 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1316 } 1317 *multi = sf->multi; 1318 PetscFunctionReturn(PETSC_SUCCESS); 1319 } 1320 1321 /*@C 1322 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1323 1324 Collective 1325 1326 Input Parameters: 1327 + sf - original star forest 1328 . nselected - number of selected roots on this process 1329 - selected - indices of the selected roots on this process 1330 1331 Output Parameter: 1332 . esf - new star forest 1333 1334 Level: advanced 1335 1336 Note: 1337 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1338 be done by calling PetscSFGetGraph(). 1339 1340 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1341 @*/ 1342 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1343 { 1344 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1345 const PetscInt *ilocal; 1346 signed char *rootdata, *leafdata, *leafmem; 1347 const PetscSFNode *iremote; 1348 PetscSFNode *new_iremote; 1349 MPI_Comm comm; 1350 1351 PetscFunctionBegin; 1352 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1353 PetscSFCheckGraphSet(sf, 1); 1354 if (nselected) PetscAssertPointer(selected, 3); 1355 PetscAssertPointer(esf, 4); 1356 1357 PetscCall(PetscSFSetUp(sf)); 1358 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1359 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1360 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1361 1362 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1363 PetscBool dups; 1364 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1365 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1366 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1367 } 1368 1369 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1370 else { 1371 /* A generic version of creating embedded sf */ 1372 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1373 maxlocal = maxleaf - minleaf + 1; 1374 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1375 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1376 /* Tag selected roots and bcast to leaves */ 1377 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1378 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1379 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1380 1381 /* Build esf with leaves that are still connected */ 1382 esf_nleaves = 0; 1383 for (i = 0; i < nleaves; i++) { 1384 j = ilocal ? ilocal[i] : i; 1385 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1386 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1387 */ 1388 esf_nleaves += (leafdata[j] ? 1 : 0); 1389 } 1390 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1391 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1392 for (i = n = 0; i < nleaves; i++) { 1393 j = ilocal ? ilocal[i] : i; 1394 if (leafdata[j]) { 1395 new_ilocal[n] = j; 1396 new_iremote[n].rank = iremote[i].rank; 1397 new_iremote[n].index = iremote[i].index; 1398 ++n; 1399 } 1400 } 1401 PetscCall(PetscSFCreate(comm, esf)); 1402 PetscCall(PetscSFSetFromOptions(*esf)); 1403 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1404 PetscCall(PetscFree2(rootdata, leafmem)); 1405 } 1406 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1407 PetscFunctionReturn(PETSC_SUCCESS); 1408 } 1409 1410 /*@C 1411 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1412 1413 Collective 1414 1415 Input Parameters: 1416 + sf - original star forest 1417 . nselected - number of selected leaves on this process 1418 - selected - indices of the selected leaves on this process 1419 1420 Output Parameter: 1421 . newsf - new star forest 1422 1423 Level: advanced 1424 1425 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1426 @*/ 1427 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1428 { 1429 const PetscSFNode *iremote; 1430 PetscSFNode *new_iremote; 1431 const PetscInt *ilocal; 1432 PetscInt i, nroots, *leaves, *new_ilocal; 1433 MPI_Comm comm; 1434 1435 PetscFunctionBegin; 1436 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1437 PetscSFCheckGraphSet(sf, 1); 1438 if (nselected) PetscAssertPointer(selected, 3); 1439 PetscAssertPointer(newsf, 4); 1440 1441 /* Uniq selected[] and put results in leaves[] */ 1442 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1443 PetscCall(PetscMalloc1(nselected, &leaves)); 1444 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1445 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1446 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1447 1448 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1449 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1450 else { 1451 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1452 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1453 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1454 for (i = 0; i < nselected; ++i) { 1455 const PetscInt l = leaves[i]; 1456 new_ilocal[i] = ilocal ? ilocal[l] : l; 1457 new_iremote[i].rank = iremote[l].rank; 1458 new_iremote[i].index = iremote[l].index; 1459 } 1460 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1461 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1462 } 1463 PetscCall(PetscFree(leaves)); 1464 PetscFunctionReturn(PETSC_SUCCESS); 1465 } 1466 1467 /*@C 1468 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1469 1470 Collective 1471 1472 Input Parameters: 1473 + sf - star forest on which to communicate 1474 . unit - data type associated with each node 1475 . rootdata - buffer to broadcast 1476 - op - operation to use for reduction 1477 1478 Output Parameter: 1479 . leafdata - buffer to be reduced with values from each leaf's respective root 1480 1481 Level: intermediate 1482 1483 Note: 1484 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1485 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1486 use `PetscSFBcastWithMemTypeBegin()` instead. 1487 1488 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1489 @*/ 1490 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1491 { 1492 PetscMemType rootmtype, leafmtype; 1493 1494 PetscFunctionBegin; 1495 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1496 PetscCall(PetscSFSetUp(sf)); 1497 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1498 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1499 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1500 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1501 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1502 PetscFunctionReturn(PETSC_SUCCESS); 1503 } 1504 1505 /*@C 1506 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1507 to `PetscSFBcastEnd()` 1508 1509 Collective 1510 1511 Input Parameters: 1512 + sf - star forest on which to communicate 1513 . unit - data type associated with each node 1514 . rootmtype - memory type of rootdata 1515 . rootdata - buffer to broadcast 1516 . leafmtype - memory type of leafdata 1517 - op - operation to use for reduction 1518 1519 Output Parameter: 1520 . leafdata - buffer to be reduced with values from each leaf's respective root 1521 1522 Level: intermediate 1523 1524 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1525 @*/ 1526 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1527 { 1528 PetscFunctionBegin; 1529 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1530 PetscCall(PetscSFSetUp(sf)); 1531 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1532 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1533 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1534 PetscFunctionReturn(PETSC_SUCCESS); 1535 } 1536 1537 /*@C 1538 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1539 1540 Collective 1541 1542 Input Parameters: 1543 + sf - star forest 1544 . unit - data type 1545 . rootdata - buffer to broadcast 1546 - op - operation to use for reduction 1547 1548 Output Parameter: 1549 . leafdata - buffer to be reduced with values from each leaf's respective root 1550 1551 Level: intermediate 1552 1553 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1554 @*/ 1555 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1556 { 1557 PetscFunctionBegin; 1558 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1559 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1560 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1561 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1562 PetscFunctionReturn(PETSC_SUCCESS); 1563 } 1564 1565 /*@C 1566 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1567 1568 Collective 1569 1570 Input Parameters: 1571 + sf - star forest 1572 . unit - data type 1573 . leafdata - values to reduce 1574 - op - reduction operation 1575 1576 Output Parameter: 1577 . rootdata - result of reduction of values from all leaves of each root 1578 1579 Level: intermediate 1580 1581 Note: 1582 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1583 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1584 use `PetscSFReduceWithMemTypeBegin()` instead. 1585 1586 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 1587 @*/ 1588 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1589 { 1590 PetscMemType rootmtype, leafmtype; 1591 1592 PetscFunctionBegin; 1593 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1594 PetscCall(PetscSFSetUp(sf)); 1595 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1596 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1597 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1598 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1599 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 /*@C 1604 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1605 1606 Collective 1607 1608 Input Parameters: 1609 + sf - star forest 1610 . unit - data type 1611 . leafmtype - memory type of leafdata 1612 . leafdata - values to reduce 1613 . rootmtype - memory type of rootdata 1614 - op - reduction operation 1615 1616 Output Parameter: 1617 . rootdata - result of reduction of values from all leaves of each root 1618 1619 Level: intermediate 1620 1621 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1622 @*/ 1623 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1624 { 1625 PetscFunctionBegin; 1626 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1627 PetscCall(PetscSFSetUp(sf)); 1628 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1629 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1630 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1631 PetscFunctionReturn(PETSC_SUCCESS); 1632 } 1633 1634 /*@C 1635 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1636 1637 Collective 1638 1639 Input Parameters: 1640 + sf - star forest 1641 . unit - data type 1642 . leafdata - values to reduce 1643 - op - reduction operation 1644 1645 Output Parameter: 1646 . rootdata - result of reduction of values from all leaves of each root 1647 1648 Level: intermediate 1649 1650 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1651 @*/ 1652 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1653 { 1654 PetscFunctionBegin; 1655 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1656 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1657 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1658 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1659 PetscFunctionReturn(PETSC_SUCCESS); 1660 } 1661 1662 /*@C 1663 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1664 to be completed with `PetscSFFetchAndOpEnd()` 1665 1666 Collective 1667 1668 Input Parameters: 1669 + sf - star forest 1670 . unit - data type 1671 . leafdata - leaf values to use in reduction 1672 - op - operation to use for reduction 1673 1674 Output Parameters: 1675 + rootdata - root values to be updated, input state is seen by first process to perform an update 1676 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1677 1678 Level: advanced 1679 1680 Note: 1681 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1682 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1683 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1684 integers. 1685 1686 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1687 @*/ 1688 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1689 { 1690 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1691 1692 PetscFunctionBegin; 1693 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1694 PetscCall(PetscSFSetUp(sf)); 1695 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1696 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1697 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1698 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1699 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1700 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1701 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1702 PetscFunctionReturn(PETSC_SUCCESS); 1703 } 1704 1705 /*@C 1706 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1707 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1708 1709 Collective 1710 1711 Input Parameters: 1712 + sf - star forest 1713 . unit - data type 1714 . rootmtype - memory type of rootdata 1715 . leafmtype - memory type of leafdata 1716 . leafdata - leaf values to use in reduction 1717 . leafupdatemtype - memory type of leafupdate 1718 - op - operation to use for reduction 1719 1720 Output Parameters: 1721 + rootdata - root values to be updated, input state is seen by first process to perform an update 1722 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1723 1724 Level: advanced 1725 1726 Note: 1727 See `PetscSFFetchAndOpBegin()` for more details. 1728 1729 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1730 @*/ 1731 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1732 { 1733 PetscFunctionBegin; 1734 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1735 PetscCall(PetscSFSetUp(sf)); 1736 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1737 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1738 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1739 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1740 PetscFunctionReturn(PETSC_SUCCESS); 1741 } 1742 1743 /*@C 1744 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1745 to fetch values from roots and update atomically by applying operation using my leaf value 1746 1747 Collective 1748 1749 Input Parameters: 1750 + sf - star forest 1751 . unit - data type 1752 . leafdata - leaf values to use in reduction 1753 - op - operation to use for reduction 1754 1755 Output Parameters: 1756 + rootdata - root values to be updated, input state is seen by first process to perform an update 1757 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1758 1759 Level: advanced 1760 1761 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1762 @*/ 1763 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1764 { 1765 PetscFunctionBegin; 1766 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1767 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1768 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1769 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1770 PetscFunctionReturn(PETSC_SUCCESS); 1771 } 1772 1773 /*@C 1774 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1775 1776 Collective 1777 1778 Input Parameter: 1779 . sf - star forest 1780 1781 Output Parameter: 1782 . degree - degree of each root vertex 1783 1784 Level: advanced 1785 1786 Note: 1787 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1788 1789 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1790 @*/ 1791 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[]) 1792 { 1793 PetscFunctionBegin; 1794 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1795 PetscSFCheckGraphSet(sf, 1); 1796 PetscAssertPointer(degree, 2); 1797 if (!sf->degreeknown) { 1798 PetscInt i, nroots = sf->nroots, maxlocal; 1799 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1800 maxlocal = sf->maxleaf - sf->minleaf + 1; 1801 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1802 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1803 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1804 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1805 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1806 } 1807 *degree = NULL; 1808 PetscFunctionReturn(PETSC_SUCCESS); 1809 } 1810 1811 /*@C 1812 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1813 1814 Collective 1815 1816 Input Parameter: 1817 . sf - star forest 1818 1819 Output Parameter: 1820 . degree - degree of each root vertex 1821 1822 Level: developer 1823 1824 Note: 1825 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1826 1827 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1828 @*/ 1829 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1830 { 1831 PetscFunctionBegin; 1832 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1833 PetscSFCheckGraphSet(sf, 1); 1834 PetscAssertPointer(degree, 2); 1835 if (!sf->degreeknown) { 1836 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1837 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1838 PetscCall(PetscFree(sf->degreetmp)); 1839 sf->degreeknown = PETSC_TRUE; 1840 } 1841 *degree = sf->degree; 1842 PetscFunctionReturn(PETSC_SUCCESS); 1843 } 1844 1845 /*@C 1846 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1847 Each multi-root is assigned index of the corresponding original root. 1848 1849 Collective 1850 1851 Input Parameters: 1852 + sf - star forest 1853 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1854 1855 Output Parameters: 1856 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1857 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1858 1859 Level: developer 1860 1861 Note: 1862 The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1863 1864 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1865 @*/ 1866 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1867 { 1868 PetscSF msf; 1869 PetscInt k = 0, nroots, nmroots; 1870 1871 PetscFunctionBegin; 1872 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1873 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1874 if (nroots) PetscAssertPointer(degree, 2); 1875 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1876 PetscAssertPointer(multiRootsOrigNumbering, 4); 1877 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1878 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1879 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1880 for (PetscInt i = 0; i < nroots; i++) { 1881 if (!degree[i]) continue; 1882 for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1883 } 1884 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1885 if (nMultiRoots) *nMultiRoots = nmroots; 1886 PetscFunctionReturn(PETSC_SUCCESS); 1887 } 1888 1889 /*@C 1890 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1891 1892 Collective 1893 1894 Input Parameters: 1895 + sf - star forest 1896 . unit - data type 1897 - leafdata - leaf data to gather to roots 1898 1899 Output Parameter: 1900 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1901 1902 Level: intermediate 1903 1904 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1905 @*/ 1906 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1907 { 1908 PetscSF multi = NULL; 1909 1910 PetscFunctionBegin; 1911 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1912 PetscCall(PetscSFSetUp(sf)); 1913 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1914 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1915 PetscFunctionReturn(PETSC_SUCCESS); 1916 } 1917 1918 /*@C 1919 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1920 1921 Collective 1922 1923 Input Parameters: 1924 + sf - star forest 1925 . unit - data type 1926 - leafdata - leaf data to gather to roots 1927 1928 Output Parameter: 1929 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1930 1931 Level: intermediate 1932 1933 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1934 @*/ 1935 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1936 { 1937 PetscSF multi = NULL; 1938 1939 PetscFunctionBegin; 1940 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1941 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1942 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 /*@C 1947 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1948 1949 Collective 1950 1951 Input Parameters: 1952 + sf - star forest 1953 . unit - data type 1954 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1955 1956 Output Parameter: 1957 . leafdata - leaf data to be update with personal data from each respective root 1958 1959 Level: intermediate 1960 1961 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 1962 @*/ 1963 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1964 { 1965 PetscSF multi = NULL; 1966 1967 PetscFunctionBegin; 1968 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1969 PetscCall(PetscSFSetUp(sf)); 1970 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1971 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1972 PetscFunctionReturn(PETSC_SUCCESS); 1973 } 1974 1975 /*@C 1976 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1977 1978 Collective 1979 1980 Input Parameters: 1981 + sf - star forest 1982 . unit - data type 1983 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1984 1985 Output Parameter: 1986 . leafdata - leaf data to be update with personal data from each respective root 1987 1988 Level: intermediate 1989 1990 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1991 @*/ 1992 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1993 { 1994 PetscSF multi = NULL; 1995 1996 PetscFunctionBegin; 1997 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1998 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1999 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 2000 PetscFunctionReturn(PETSC_SUCCESS); 2001 } 2002 2003 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 2004 { 2005 PetscInt i, n, nleaves; 2006 const PetscInt *ilocal = NULL; 2007 PetscHSetI seen; 2008 2009 PetscFunctionBegin; 2010 if (PetscDefined(USE_DEBUG)) { 2011 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 2012 PetscCall(PetscHSetICreate(&seen)); 2013 for (i = 0; i < nleaves; i++) { 2014 const PetscInt leaf = ilocal ? ilocal[i] : i; 2015 PetscCall(PetscHSetIAdd(seen, leaf)); 2016 } 2017 PetscCall(PetscHSetIGetSize(seen, &n)); 2018 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 2019 PetscCall(PetscHSetIDestroy(&seen)); 2020 } 2021 PetscFunctionReturn(PETSC_SUCCESS); 2022 } 2023 2024 /*@ 2025 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2026 2027 Input Parameters: 2028 + sfA - The first `PetscSF` 2029 - sfB - The second `PetscSF` 2030 2031 Output Parameter: 2032 . sfBA - The composite `PetscSF` 2033 2034 Level: developer 2035 2036 Notes: 2037 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2038 forests, i.e. the same leaf is not connected with different roots. 2039 2040 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 2041 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 2042 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 2043 `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 2044 2045 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2046 @*/ 2047 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2048 { 2049 const PetscSFNode *remotePointsA, *remotePointsB; 2050 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 2051 const PetscInt *localPointsA, *localPointsB; 2052 PetscInt *localPointsBA; 2053 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 2054 PetscBool denseB; 2055 2056 PetscFunctionBegin; 2057 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2058 PetscSFCheckGraphSet(sfA, 1); 2059 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2060 PetscSFCheckGraphSet(sfB, 2); 2061 PetscCheckSameComm(sfA, 1, sfB, 2); 2062 PetscAssertPointer(sfBA, 3); 2063 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2064 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2065 2066 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2067 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2068 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2069 numRootsB; otherwise, garbage will be broadcasted. 2070 Example (comm size = 1): 2071 sfA: 0 <- (0, 0) 2072 sfB: 100 <- (0, 0) 2073 101 <- (0, 1) 2074 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2075 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2076 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2077 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2078 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2079 for (i = 0; i < numRootsB; i++) { 2080 reorderedRemotePointsA[i].rank = -1; 2081 reorderedRemotePointsA[i].index = -1; 2082 } 2083 for (i = 0; i < numLeavesA; i++) { 2084 PetscInt localp = localPointsA ? localPointsA[i] : i; 2085 2086 if (localp >= numRootsB) continue; 2087 reorderedRemotePointsA[localp] = remotePointsA[i]; 2088 } 2089 remotePointsA = reorderedRemotePointsA; 2090 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2091 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2092 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2093 leafdataB[i].rank = -1; 2094 leafdataB[i].index = -1; 2095 } 2096 PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2097 PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2098 PetscCall(PetscFree(reorderedRemotePointsA)); 2099 2100 denseB = (PetscBool)!localPointsB; 2101 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2102 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2103 else numLeavesBA++; 2104 } 2105 if (denseB) { 2106 localPointsBA = NULL; 2107 remotePointsBA = leafdataB; 2108 } else { 2109 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2110 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2111 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2112 const PetscInt l = localPointsB ? localPointsB[i] : i; 2113 2114 if (leafdataB[l - minleaf].rank == -1) continue; 2115 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2116 localPointsBA[numLeavesBA] = l; 2117 numLeavesBA++; 2118 } 2119 PetscCall(PetscFree(leafdataB)); 2120 } 2121 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2122 PetscCall(PetscSFSetFromOptions(*sfBA)); 2123 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2124 PetscFunctionReturn(PETSC_SUCCESS); 2125 } 2126 2127 /*@ 2128 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2129 2130 Input Parameters: 2131 + sfA - The first `PetscSF` 2132 - sfB - The second `PetscSF` 2133 2134 Output Parameter: 2135 . sfBA - The composite `PetscSF`. 2136 2137 Level: developer 2138 2139 Notes: 2140 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2141 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2142 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2143 2144 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2145 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2146 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 2147 on `sfA`, then 2148 a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 2149 2150 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2151 @*/ 2152 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2153 { 2154 const PetscSFNode *remotePointsA, *remotePointsB; 2155 PetscSFNode *remotePointsBA; 2156 const PetscInt *localPointsA, *localPointsB; 2157 PetscSFNode *reorderedRemotePointsA = NULL; 2158 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2159 MPI_Op op; 2160 #if defined(PETSC_USE_64BIT_INDICES) 2161 PetscBool iswin; 2162 #endif 2163 2164 PetscFunctionBegin; 2165 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2166 PetscSFCheckGraphSet(sfA, 1); 2167 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2168 PetscSFCheckGraphSet(sfB, 2); 2169 PetscCheckSameComm(sfA, 1, sfB, 2); 2170 PetscAssertPointer(sfBA, 3); 2171 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2172 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2173 2174 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2175 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2176 2177 /* TODO: Check roots of sfB have degree of 1 */ 2178 /* Once we implement it, we can replace the MPI_MAXLOC 2179 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2180 We use MPI_MAXLOC only to have a deterministic output from this routine if 2181 the root condition is not meet. 2182 */ 2183 op = MPI_MAXLOC; 2184 #if defined(PETSC_USE_64BIT_INDICES) 2185 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2186 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2187 if (iswin) op = MPI_REPLACE; 2188 #endif 2189 2190 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2191 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2192 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2193 reorderedRemotePointsA[i].rank = -1; 2194 reorderedRemotePointsA[i].index = -1; 2195 } 2196 if (localPointsA) { 2197 for (i = 0; i < numLeavesA; i++) { 2198 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2199 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2200 } 2201 } else { 2202 for (i = 0; i < numLeavesA; i++) { 2203 if (i > maxleaf || i < minleaf) continue; 2204 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2205 } 2206 } 2207 2208 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2209 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2210 for (i = 0; i < numRootsB; i++) { 2211 remotePointsBA[i].rank = -1; 2212 remotePointsBA[i].index = -1; 2213 } 2214 2215 PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2216 PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2217 PetscCall(PetscFree(reorderedRemotePointsA)); 2218 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2219 if (remotePointsBA[i].rank == -1) continue; 2220 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2221 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2222 localPointsBA[numLeavesBA] = i; 2223 numLeavesBA++; 2224 } 2225 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2226 PetscCall(PetscSFSetFromOptions(*sfBA)); 2227 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2228 PetscFunctionReturn(PETSC_SUCCESS); 2229 } 2230 2231 /* 2232 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2233 2234 Input Parameter: 2235 . sf - The global `PetscSF` 2236 2237 Output Parameter: 2238 . out - The local `PetscSF` 2239 2240 .seealso: `PetscSF`, `PetscSFCreate()` 2241 */ 2242 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2243 { 2244 MPI_Comm comm; 2245 PetscMPIInt myrank; 2246 const PetscInt *ilocal; 2247 const PetscSFNode *iremote; 2248 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2249 PetscSFNode *liremote; 2250 PetscSF lsf; 2251 2252 PetscFunctionBegin; 2253 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2254 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2255 else { 2256 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2257 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2258 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2259 2260 /* Find out local edges and build a local SF */ 2261 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2262 for (i = lnleaves = 0; i < nleaves; i++) { 2263 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2264 } 2265 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2266 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2267 2268 for (i = j = 0; i < nleaves; i++) { 2269 if (iremote[i].rank == (PetscInt)myrank) { 2270 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2271 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2272 liremote[j].index = iremote[i].index; 2273 j++; 2274 } 2275 } 2276 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2277 PetscCall(PetscSFSetFromOptions(lsf)); 2278 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2279 PetscCall(PetscSFSetUp(lsf)); 2280 *out = lsf; 2281 } 2282 PetscFunctionReturn(PETSC_SUCCESS); 2283 } 2284 2285 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2286 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2287 { 2288 PetscMemType rootmtype, leafmtype; 2289 2290 PetscFunctionBegin; 2291 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2292 PetscCall(PetscSFSetUp(sf)); 2293 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2294 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2295 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2296 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2297 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2298 PetscFunctionReturn(PETSC_SUCCESS); 2299 } 2300 2301 /*@ 2302 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2303 2304 Input Parameters: 2305 + comm - the communicator 2306 . nsfs - the number of input `PetscSF` 2307 . sfs - the array of input `PetscSF` 2308 . rootMode - the root mode specifying how roots are handled 2309 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2310 2311 Output Parameter: 2312 . newsf - The resulting `PetscSF` 2313 2314 Level: advanced 2315 2316 Notes: 2317 The communicator of all `PetscSF`s in `sfs` must be comm. 2318 2319 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2320 2321 The offsets in `leafOffsets` are added to the original leaf indices. 2322 2323 If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2324 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2325 2326 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2327 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2328 2329 All root modes retain the essential connectivity condition. 2330 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2331 Parameter `rootMode` controls how the input root spaces are combined. 2332 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2333 and is also the same in the output `PetscSF`. 2334 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2335 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2336 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2337 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2338 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2339 the original root ranks are ignored. 2340 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2341 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 2342 to keep the load balancing. 2343 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 2344 2345 Example: 2346 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2347 .vb 2348 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2349 for m in {local,global,shared}; do 2350 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2351 done 2352 .ve 2353 we generate two identical `PetscSF`s sf_0 and sf_1, 2354 .vb 2355 PetscSF Object: sf_0 2 MPI processes 2356 type: basic 2357 rank #leaves #roots 2358 [ 0] 4 2 2359 [ 1] 4 2 2360 leaves roots roots in global numbering 2361 ( 0, 0) <- ( 0, 0) = 0 2362 ( 0, 1) <- ( 0, 1) = 1 2363 ( 0, 2) <- ( 1, 0) = 2 2364 ( 0, 3) <- ( 1, 1) = 3 2365 ( 1, 0) <- ( 0, 0) = 0 2366 ( 1, 1) <- ( 0, 1) = 1 2367 ( 1, 2) <- ( 1, 0) = 2 2368 ( 1, 3) <- ( 1, 1) = 3 2369 .ve 2370 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2371 .vb 2372 rootMode = local: 2373 PetscSF Object: result_sf 2 MPI processes 2374 type: basic 2375 rank #leaves #roots 2376 [ 0] 8 4 2377 [ 1] 8 4 2378 leaves roots roots in global numbering 2379 ( 0, 0) <- ( 0, 0) = 0 2380 ( 0, 1) <- ( 0, 1) = 1 2381 ( 0, 2) <- ( 1, 0) = 4 2382 ( 0, 3) <- ( 1, 1) = 5 2383 ( 0, 4) <- ( 0, 2) = 2 2384 ( 0, 5) <- ( 0, 3) = 3 2385 ( 0, 6) <- ( 1, 2) = 6 2386 ( 0, 7) <- ( 1, 3) = 7 2387 ( 1, 0) <- ( 0, 0) = 0 2388 ( 1, 1) <- ( 0, 1) = 1 2389 ( 1, 2) <- ( 1, 0) = 4 2390 ( 1, 3) <- ( 1, 1) = 5 2391 ( 1, 4) <- ( 0, 2) = 2 2392 ( 1, 5) <- ( 0, 3) = 3 2393 ( 1, 6) <- ( 1, 2) = 6 2394 ( 1, 7) <- ( 1, 3) = 7 2395 2396 rootMode = global: 2397 PetscSF Object: result_sf 2 MPI processes 2398 type: basic 2399 rank #leaves #roots 2400 [ 0] 8 4 2401 [ 1] 8 4 2402 leaves roots roots in global numbering 2403 ( 0, 0) <- ( 0, 0) = 0 2404 ( 0, 1) <- ( 0, 1) = 1 2405 ( 0, 2) <- ( 0, 2) = 2 2406 ( 0, 3) <- ( 0, 3) = 3 2407 ( 0, 4) <- ( 1, 0) = 4 2408 ( 0, 5) <- ( 1, 1) = 5 2409 ( 0, 6) <- ( 1, 2) = 6 2410 ( 0, 7) <- ( 1, 3) = 7 2411 ( 1, 0) <- ( 0, 0) = 0 2412 ( 1, 1) <- ( 0, 1) = 1 2413 ( 1, 2) <- ( 0, 2) = 2 2414 ( 1, 3) <- ( 0, 3) = 3 2415 ( 1, 4) <- ( 1, 0) = 4 2416 ( 1, 5) <- ( 1, 1) = 5 2417 ( 1, 6) <- ( 1, 2) = 6 2418 ( 1, 7) <- ( 1, 3) = 7 2419 2420 rootMode = shared: 2421 PetscSF Object: result_sf 2 MPI processes 2422 type: basic 2423 rank #leaves #roots 2424 [ 0] 8 2 2425 [ 1] 8 2 2426 leaves roots roots in global numbering 2427 ( 0, 0) <- ( 0, 0) = 0 2428 ( 0, 1) <- ( 0, 1) = 1 2429 ( 0, 2) <- ( 1, 0) = 2 2430 ( 0, 3) <- ( 1, 1) = 3 2431 ( 0, 4) <- ( 0, 0) = 0 2432 ( 0, 5) <- ( 0, 1) = 1 2433 ( 0, 6) <- ( 1, 0) = 2 2434 ( 0, 7) <- ( 1, 1) = 3 2435 ( 1, 0) <- ( 0, 0) = 0 2436 ( 1, 1) <- ( 0, 1) = 1 2437 ( 1, 2) <- ( 1, 0) = 2 2438 ( 1, 3) <- ( 1, 1) = 3 2439 ( 1, 4) <- ( 0, 0) = 0 2440 ( 1, 5) <- ( 0, 1) = 1 2441 ( 1, 6) <- ( 1, 0) = 2 2442 ( 1, 7) <- ( 1, 1) = 3 2443 .ve 2444 2445 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2446 @*/ 2447 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2448 { 2449 PetscInt i, s, nLeaves, nRoots; 2450 PetscInt *leafArrayOffsets; 2451 PetscInt *ilocal_new; 2452 PetscSFNode *iremote_new; 2453 PetscBool all_ilocal_null = PETSC_FALSE; 2454 PetscLayout glayout = NULL; 2455 PetscInt *gremote = NULL; 2456 PetscMPIInt rank, size; 2457 2458 PetscFunctionBegin; 2459 if (PetscDefined(USE_DEBUG)) { 2460 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2461 2462 PetscCall(PetscSFCreate(comm, &dummy)); 2463 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2464 PetscAssertPointer(sfs, 3); 2465 for (i = 0; i < nsfs; i++) { 2466 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2467 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2468 } 2469 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2470 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2471 PetscAssertPointer(newsf, 6); 2472 PetscCall(PetscSFDestroy(&dummy)); 2473 } 2474 if (!nsfs) { 2475 PetscCall(PetscSFCreate(comm, newsf)); 2476 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2477 PetscFunctionReturn(PETSC_SUCCESS); 2478 } 2479 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2480 PetscCallMPI(MPI_Comm_size(comm, &size)); 2481 2482 /* Calculate leaf array offsets */ 2483 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2484 leafArrayOffsets[0] = 0; 2485 for (s = 0; s < nsfs; s++) { 2486 PetscInt nl; 2487 2488 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2489 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2490 } 2491 nLeaves = leafArrayOffsets[nsfs]; 2492 2493 /* Calculate number of roots */ 2494 switch (rootMode) { 2495 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2496 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2497 if (PetscDefined(USE_DEBUG)) { 2498 for (s = 1; s < nsfs; s++) { 2499 PetscInt nr; 2500 2501 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2502 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2503 } 2504 } 2505 } break; 2506 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2507 /* Calculate also global layout in this case */ 2508 PetscInt *nls; 2509 PetscLayout *lts; 2510 PetscInt **inds; 2511 PetscInt j; 2512 PetscInt rootOffset = 0; 2513 2514 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2515 PetscCall(PetscLayoutCreate(comm, &glayout)); 2516 glayout->bs = 1; 2517 glayout->n = 0; 2518 glayout->N = 0; 2519 for (s = 0; s < nsfs; s++) { 2520 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2521 glayout->n += lts[s]->n; 2522 glayout->N += lts[s]->N; 2523 } 2524 PetscCall(PetscLayoutSetUp(glayout)); 2525 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2526 for (s = 0, j = 0; s < nsfs; s++) { 2527 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2528 rootOffset += lts[s]->N; 2529 PetscCall(PetscLayoutDestroy(<s[s])); 2530 PetscCall(PetscFree(inds[s])); 2531 } 2532 PetscCall(PetscFree3(lts, nls, inds)); 2533 nRoots = glayout->N; 2534 } break; 2535 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2536 /* nRoots calculated later in this case */ 2537 break; 2538 default: 2539 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2540 } 2541 2542 if (!leafOffsets) { 2543 all_ilocal_null = PETSC_TRUE; 2544 for (s = 0; s < nsfs; s++) { 2545 const PetscInt *ilocal; 2546 2547 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2548 if (ilocal) { 2549 all_ilocal_null = PETSC_FALSE; 2550 break; 2551 } 2552 } 2553 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2554 } 2555 2556 /* Renumber and concatenate local leaves */ 2557 ilocal_new = NULL; 2558 if (!all_ilocal_null) { 2559 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2560 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2561 for (s = 0; s < nsfs; s++) { 2562 const PetscInt *ilocal; 2563 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2564 PetscInt i, nleaves_l; 2565 2566 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2567 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2568 } 2569 } 2570 2571 /* Renumber and concatenate remote roots */ 2572 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2573 PetscInt rootOffset = 0; 2574 2575 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2576 for (i = 0; i < nLeaves; i++) { 2577 iremote_new[i].rank = -1; 2578 iremote_new[i].index = -1; 2579 } 2580 for (s = 0; s < nsfs; s++) { 2581 PetscInt i, nl, nr; 2582 PetscSF tmp_sf; 2583 const PetscSFNode *iremote; 2584 PetscSFNode *tmp_rootdata; 2585 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2586 2587 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2588 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2589 /* create helper SF with contiguous leaves */ 2590 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2591 PetscCall(PetscSFSetUp(tmp_sf)); 2592 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2593 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2594 for (i = 0; i < nr; i++) { 2595 tmp_rootdata[i].index = i + rootOffset; 2596 tmp_rootdata[i].rank = rank; 2597 } 2598 rootOffset += nr; 2599 } else { 2600 for (i = 0; i < nr; i++) { 2601 tmp_rootdata[i].index = i; 2602 tmp_rootdata[i].rank = rank; 2603 } 2604 } 2605 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2606 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2607 PetscCall(PetscSFDestroy(&tmp_sf)); 2608 PetscCall(PetscFree(tmp_rootdata)); 2609 } 2610 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2611 2612 /* Build the new SF */ 2613 PetscCall(PetscSFCreate(comm, newsf)); 2614 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2615 } else { 2616 /* Build the new SF */ 2617 PetscCall(PetscSFCreate(comm, newsf)); 2618 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2619 } 2620 PetscCall(PetscSFSetUp(*newsf)); 2621 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2622 PetscCall(PetscLayoutDestroy(&glayout)); 2623 PetscCall(PetscFree(gremote)); 2624 PetscCall(PetscFree(leafArrayOffsets)); 2625 PetscFunctionReturn(PETSC_SUCCESS); 2626 } 2627 2628 /*@ 2629 PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications. 2630 2631 Collective 2632 2633 Input Parameters: 2634 + sf - star forest 2635 . unit - the data type contained within the root and leaf data 2636 . rootdata - root data that will be used for muliple PetscSF communications 2637 - leafdata - leaf data that will be used for muliple PetscSF communications 2638 2639 Level: advanced 2640 2641 Notes: 2642 Implementations of `PetscSF` can make optimizations 2643 for repeated communication using the same memory regions, but these optimizations 2644 can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed. 2645 The intended pattern is 2646 2647 .vb 2648 PetscMalloc2(nroots, &rootdata, nleaves, &leafdata); 2649 2650 PetscSFRegisterPersistent(sf, unit, rootdata, leafdata); 2651 // repeated use of rootdata and leafdata will now be optimized 2652 2653 PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE); 2654 PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE); 2655 // ... 2656 PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM); 2657 PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM); 2658 // ... (other communications) 2659 2660 // rootdata and leafdata must be deregistered before freeing 2661 // skipping this can lead to undefined behavior including 2662 // deadlocks 2663 PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata); 2664 2665 // it is now safe to free rootdata and leafdata 2666 PetscFree2(rootdata, leafdata); 2667 .ve 2668 2669 If you do not register `rootdata` and `leafdata` it will not cause an error, 2670 but optimizations that reduce the setup time for each communication cannot be 2671 made. Currently, the only implementation of `PetscSF` that benefits from 2672 `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default 2673 `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`. 2674 2675 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()` 2676 @*/ 2677 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2678 { 2679 PetscFunctionBegin; 2680 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2681 PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684 2685 /*@ 2686 PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded. 2687 2688 Collective 2689 2690 Input Parameters: 2691 + sf - star forest 2692 . unit - the data type contained within the root and leaf data 2693 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()` 2694 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()` 2695 2696 Level: advanced 2697 2698 Note: 2699 See `PetscSFRegisterPersistent()` for when/how to use this function. 2700 2701 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()` 2702 @*/ 2703 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2704 { 2705 PetscFunctionBegin; 2706 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2707 PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2708 PetscFunctionReturn(PETSC_SUCCESS); 2709 } 2710 2711 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size) 2712 { 2713 MPI_Aint lb, lb_true, bytes, bytes_true; 2714 2715 PetscFunctionBegin; 2716 PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 2717 PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 2718 PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 2719 *size = bytes; 2720 PetscFunctionReturn(PETSC_SUCCESS); 2721 } 2722