1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <petscdevice_cuda.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 extern void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Key: 41 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window - Use MPI-3 one-sided window for communication 43 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 44 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 45 46 Level: intermediate 47 48 Note: 49 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 50 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 51 `SF`s are optimized and they have better performance than the general `SF`s. 52 53 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 54 @*/ 55 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 56 { 57 PetscSF b; 58 59 PetscFunctionBegin; 60 PetscAssertPointer(sf, 2); 61 PetscCall(PetscSFInitializePackage()); 62 63 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 64 b->nroots = -1; 65 b->nleaves = -1; 66 b->minleaf = PETSC_INT_MAX; 67 b->maxleaf = PETSC_INT_MIN; 68 b->nranks = -1; 69 b->rankorder = PETSC_TRUE; 70 b->ingroup = MPI_GROUP_NULL; 71 b->outgroup = MPI_GROUP_NULL; 72 b->graphset = PETSC_FALSE; 73 #if defined(PETSC_HAVE_DEVICE) 74 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 75 b->use_stream_aware_mpi = PETSC_FALSE; 76 b->unknown_input_stream = PETSC_FALSE; 77 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 78 b->backend = PETSCSF_BACKEND_KOKKOS; 79 #elif defined(PETSC_HAVE_CUDA) 80 b->backend = PETSCSF_BACKEND_CUDA; 81 #elif defined(PETSC_HAVE_HIP) 82 b->backend = PETSCSF_BACKEND_HIP; 83 #endif 84 85 #if defined(PETSC_HAVE_NVSHMEM) 86 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 87 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 88 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 89 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 90 #endif 91 #endif 92 b->vscat.from_n = -1; 93 b->vscat.to_n = -1; 94 b->vscat.unit = MPIU_SCALAR; 95 *sf = b; 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 /*@ 100 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 101 102 Collective 103 104 Input Parameter: 105 . sf - star forest 106 107 Level: advanced 108 109 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 110 @*/ 111 PetscErrorCode PetscSFReset(PetscSF sf) 112 { 113 PetscFunctionBegin; 114 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 115 PetscTryTypeMethod(sf, Reset); 116 PetscCall(PetscSFDestroy(&sf->rankssf)); 117 118 sf->nroots = -1; 119 sf->nleaves = -1; 120 sf->minleaf = PETSC_INT_MAX; 121 sf->maxleaf = PETSC_INT_MIN; 122 sf->mine = NULL; 123 sf->remote = NULL; 124 sf->graphset = PETSC_FALSE; 125 PetscCall(PetscFree(sf->mine_alloc)); 126 PetscCall(PetscFree(sf->remote_alloc)); 127 sf->nranks = -1; 128 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 129 sf->degreeknown = PETSC_FALSE; 130 PetscCall(PetscFree(sf->degree)); 131 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 132 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 133 134 if (sf->multi) sf->multi->multi = NULL; 135 PetscCall(PetscSFDestroy(&sf->multi)); 136 137 PetscCall(PetscLayoutDestroy(&sf->map)); 138 139 #if defined(PETSC_HAVE_DEVICE) 140 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 141 #endif 142 143 sf->setupcalled = PETSC_FALSE; 144 PetscFunctionReturn(PETSC_SUCCESS); 145 } 146 147 /*@ 148 PetscSFSetType - Set the `PetscSF` communication implementation 149 150 Collective 151 152 Input Parameters: 153 + sf - the `PetscSF` context 154 - type - a known method 155 .vb 156 PETSCSFWINDOW - MPI-2/3 one-sided 157 PETSCSFBASIC - basic implementation using MPI-1 two-sided 158 .ve 159 160 Options Database Key: 161 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 162 163 Level: intermediate 164 165 Notes: 166 See `PetscSFType` for possible values 167 168 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 169 @*/ 170 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 171 { 172 PetscBool match; 173 PetscErrorCode (*r)(PetscSF); 174 175 PetscFunctionBegin; 176 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 177 PetscAssertPointer(type, 2); 178 179 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 180 if (match) PetscFunctionReturn(PETSC_SUCCESS); 181 182 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 183 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 184 /* Destroy the previous PetscSF implementation context */ 185 PetscTryTypeMethod(sf, Destroy); 186 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 187 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 188 PetscCall((*r)(sf)); 189 PetscFunctionReturn(PETSC_SUCCESS); 190 } 191 192 /*@ 193 PetscSFGetType - Get the `PetscSF` communication implementation 194 195 Not Collective 196 197 Input Parameter: 198 . sf - the `PetscSF` context 199 200 Output Parameter: 201 . type - the `PetscSF` type name 202 203 Level: intermediate 204 205 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 206 @*/ 207 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 208 { 209 PetscFunctionBegin; 210 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 211 PetscAssertPointer(type, 2); 212 *type = ((PetscObject)sf)->type_name; 213 PetscFunctionReturn(PETSC_SUCCESS); 214 } 215 216 /*@ 217 PetscSFDestroy - destroy a star forest 218 219 Collective 220 221 Input Parameter: 222 . sf - address of star forest 223 224 Level: intermediate 225 226 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 227 @*/ 228 PetscErrorCode PetscSFDestroy(PetscSF *sf) 229 { 230 PetscFunctionBegin; 231 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 232 PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 233 if (--((PetscObject)*sf)->refct > 0) { 234 *sf = NULL; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 PetscCall(PetscSFReset(*sf)); 238 PetscTryTypeMethod(*sf, Destroy); 239 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 240 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 241 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 242 if ((*sf)->use_stream_aware_mpi) { 243 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 244 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 245 } 246 #endif 247 PetscCall(PetscHeaderDestroy(sf)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 252 { 253 PetscInt i, nleaves; 254 PetscMPIInt size; 255 const PetscInt *ilocal; 256 const PetscSFNode *iremote; 257 258 PetscFunctionBegin; 259 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 260 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 261 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 262 for (i = 0; i < nleaves; i++) { 263 const PetscInt rank = iremote[i].rank; 264 const PetscInt remote = iremote[i].index; 265 const PetscInt leaf = ilocal ? ilocal[i] : i; 266 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 267 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 268 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 269 } 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 /*@ 274 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 275 276 Collective 277 278 Input Parameter: 279 . sf - star forest communication object 280 281 Level: beginner 282 283 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 284 @*/ 285 PetscErrorCode PetscSFSetUp(PetscSF sf) 286 { 287 PetscFunctionBegin; 288 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 289 PetscSFCheckGraphSet(sf, 1); 290 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 291 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 292 PetscCall(PetscSFCheckGraphValid_Private(sf)); 293 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 294 PetscTryTypeMethod(sf, SetUp); 295 #if defined(PETSC_HAVE_CUDA) 296 if (sf->backend == PETSCSF_BACKEND_CUDA) { 297 sf->ops->Malloc = PetscSFMalloc_CUDA; 298 sf->ops->Free = PetscSFFree_CUDA; 299 } 300 #endif 301 #if defined(PETSC_HAVE_HIP) 302 if (sf->backend == PETSCSF_BACKEND_HIP) { 303 sf->ops->Malloc = PetscSFMalloc_HIP; 304 sf->ops->Free = PetscSFFree_HIP; 305 } 306 #endif 307 308 #if defined(PETSC_HAVE_KOKKOS) 309 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 310 sf->ops->Malloc = PetscSFMalloc_Kokkos; 311 sf->ops->Free = PetscSFFree_Kokkos; 312 } 313 #endif 314 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 315 sf->setupcalled = PETSC_TRUE; 316 PetscFunctionReturn(PETSC_SUCCESS); 317 } 318 319 /*@ 320 PetscSFSetFromOptions - set `PetscSF` options using the options database 321 322 Logically Collective 323 324 Input Parameter: 325 . sf - star forest 326 327 Options Database Keys: 328 + -sf_type - implementation type, see `PetscSFSetType()` 329 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 330 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 331 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 332 If true, this option only works with `-use_gpu_aware_mpi 1`. 333 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 334 If true, this option only works with `-use_gpu_aware_mpi 1`. 335 336 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 337 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 338 the only available is kokkos. 339 340 Level: intermediate 341 342 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 343 @*/ 344 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 345 { 346 PetscSFType deft; 347 char type[256]; 348 PetscBool flg; 349 350 PetscFunctionBegin; 351 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 352 PetscObjectOptionsBegin((PetscObject)sf); 353 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 354 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 355 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 356 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 357 PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL)); 358 #if defined(PETSC_HAVE_DEVICE) 359 { 360 char backendstr[32] = {0}; 361 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 362 /* Change the defaults set in PetscSFCreate() with command line options */ 363 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 364 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 365 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 366 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 367 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 368 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 369 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 370 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 371 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 372 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 373 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 374 #elif defined(PETSC_HAVE_KOKKOS) 375 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 376 #endif 377 378 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 379 if (sf->use_stream_aware_mpi) { 380 MPI_Info info; 381 382 PetscCallMPI(MPI_Info_create(&info)); 383 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 384 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 385 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 386 PetscCallMPI(MPI_Info_free(&info)); 387 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 388 } 389 #endif 390 } 391 #endif 392 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 393 PetscOptionsEnd(); 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 397 /*@ 398 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 399 400 Logically Collective 401 402 Input Parameters: 403 + sf - star forest 404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 405 406 Level: advanced 407 408 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 409 @*/ 410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 411 { 412 PetscFunctionBegin; 413 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 414 PetscValidLogicalCollectiveBool(sf, flg, 2); 415 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 416 sf->rankorder = flg; 417 PetscFunctionReturn(PETSC_SUCCESS); 418 } 419 420 /*@ 421 PetscSFSetGraph - Set a parallel star forest 422 423 Collective 424 425 Input Parameters: 426 + sf - star forest 427 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 428 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 429 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 430 during setup in debug mode) 431 . localmode - copy mode for `ilocal` 432 . iremote - remote locations of root vertices for each leaf on the current process, length is 2 `nleaves' 433 (locations must be >= 0, enforced during setup in debug mode) 434 - remotemode - copy mode for `iremote` 435 436 Level: intermediate 437 438 Notes: 439 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 440 441 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 442 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 443 PETSc might modify the respective array; 444 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 445 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 446 447 Fortran Notes: 448 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 449 450 Developer Notes: 451 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 452 This also allows to compare leaf sets of two `PetscSF`s easily. 453 454 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 455 @*/ 456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode) 457 { 458 PetscBool unique, contiguous; 459 460 PetscFunctionBegin; 461 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 462 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 463 if (nleaves > 0) PetscAssertPointer(iremote, 6); 464 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 465 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 466 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 467 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 468 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 469 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 470 471 if (sf->nroots >= 0) { /* Reset only if graph already set */ 472 PetscCall(PetscSFReset(sf)); 473 } 474 475 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 476 if (PetscDefined(USE_DEBUG)) { 477 PetscMPIInt size; 478 479 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 480 for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); 481 } 482 483 sf->nroots = nroots; 484 sf->nleaves = nleaves; 485 486 if (localmode == PETSC_COPY_VALUES && ilocal) { 487 PetscInt *tlocal = NULL; 488 489 PetscCall(PetscMalloc1(nleaves, &tlocal)); 490 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 491 ilocal = tlocal; 492 } 493 if (remotemode == PETSC_COPY_VALUES) { 494 PetscSFNode *tremote = NULL; 495 496 PetscCall(PetscMalloc1(nleaves, &tremote)); 497 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 498 iremote = tremote; 499 } 500 501 if (nleaves && ilocal) { 502 PetscSFNode work; 503 504 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 505 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 506 unique = PetscNot(unique); 507 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 508 sf->minleaf = ilocal[0]; 509 sf->maxleaf = ilocal[nleaves - 1]; 510 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 511 } else { 512 sf->minleaf = 0; 513 sf->maxleaf = nleaves - 1; 514 unique = PETSC_TRUE; 515 contiguous = PETSC_TRUE; 516 } 517 518 if (contiguous) { 519 if (localmode == PETSC_USE_POINTER) { 520 ilocal = NULL; 521 } else { 522 PetscCall(PetscFree(ilocal)); 523 } 524 } 525 sf->mine = ilocal; 526 if (localmode == PETSC_USE_POINTER) { 527 sf->mine_alloc = NULL; 528 } else { 529 sf->mine_alloc = ilocal; 530 } 531 if (PetscDefined(USE_DEBUG)) { 532 PetscMPIInt size; 533 534 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 535 for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); 536 } 537 sf->remote = iremote; 538 if (remotemode == PETSC_USE_POINTER) { 539 sf->remote_alloc = NULL; 540 } else { 541 sf->remote_alloc = iremote; 542 } 543 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 544 sf->graphset = PETSC_TRUE; 545 PetscFunctionReturn(PETSC_SUCCESS); 546 } 547 548 /*@ 549 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 550 551 Collective 552 553 Input Parameters: 554 + sf - The `PetscSF` 555 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 556 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 557 558 Level: intermediate 559 560 Notes: 561 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 562 `n` and `N` are the local and global sizes of `x` respectively. 563 564 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 565 sequential vectors `y` on all MPI processes. 566 567 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 568 sequential vector `y` on rank 0. 569 570 In above cases, entries of `x` are roots and entries of `y` are leaves. 571 572 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 573 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 574 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 575 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 576 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 577 578 In this case, roots and leaves are symmetric. 579 580 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 581 @*/ 582 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 583 { 584 MPI_Comm comm; 585 PetscInt n, N, res[2]; 586 PetscMPIInt rank, size; 587 PetscSFType type; 588 589 PetscFunctionBegin; 590 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 591 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 592 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 593 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 594 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 595 PetscCallMPI(MPI_Comm_size(comm, &size)); 596 597 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 598 PetscInt sizei = size; 599 600 type = PETSCSFALLTOALL; 601 PetscCall(PetscLayoutCreate(comm, &sf->map)); 602 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 603 PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei))); 604 PetscCall(PetscLayoutSetUp(sf->map)); 605 } else { 606 PetscCall(PetscLayoutGetLocalSize(map, &n)); 607 PetscCall(PetscLayoutGetSize(map, &N)); 608 res[0] = n; 609 res[1] = -n; 610 /* Check if n are same over all ranks so that we can optimize it */ 611 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 612 if (res[0] == -res[1]) { /* same n */ 613 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 614 } else { 615 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 616 } 617 PetscCall(PetscLayoutReference(map, &sf->map)); 618 } 619 PetscCall(PetscSFSetType(sf, type)); 620 621 sf->pattern = pattern; 622 sf->mine = NULL; /* Contiguous */ 623 624 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 625 Also set other easy stuff. 626 */ 627 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 628 sf->nleaves = N; 629 sf->nroots = n; 630 sf->nranks = size; 631 sf->minleaf = 0; 632 sf->maxleaf = N - 1; 633 } else if (pattern == PETSCSF_PATTERN_GATHER) { 634 sf->nleaves = rank ? 0 : N; 635 sf->nroots = n; 636 sf->nranks = rank ? 0 : size; 637 sf->minleaf = 0; 638 sf->maxleaf = rank ? -1 : N - 1; 639 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 640 sf->nleaves = size; 641 sf->nroots = size; 642 sf->nranks = size; 643 sf->minleaf = 0; 644 sf->maxleaf = size - 1; 645 } 646 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 647 sf->graphset = PETSC_TRUE; 648 PetscFunctionReturn(PETSC_SUCCESS); 649 } 650 651 /*@ 652 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 653 654 Collective 655 656 Input Parameter: 657 . sf - star forest to invert 658 659 Output Parameter: 660 . isf - inverse of `sf` 661 662 Level: advanced 663 664 Notes: 665 All roots must have degree 1. 666 667 The local space may be a permutation, but cannot be sparse. 668 669 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 670 @*/ 671 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 672 { 673 PetscMPIInt rank; 674 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 675 const PetscInt *ilocal; 676 PetscSFNode *roots, *leaves; 677 678 PetscFunctionBegin; 679 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 680 PetscSFCheckGraphSet(sf, 1); 681 PetscAssertPointer(isf, 2); 682 683 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 684 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 685 686 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 687 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 688 for (i = 0; i < maxlocal; i++) { 689 leaves[i].rank = rank; 690 leaves[i].index = i; 691 } 692 for (i = 0; i < nroots; i++) { 693 roots[i].rank = -1; 694 roots[i].index = -1; 695 } 696 PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 697 PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE)); 698 699 /* Check whether our leaves are sparse */ 700 for (i = 0, count = 0; i < nroots; i++) 701 if (roots[i].rank >= 0) count++; 702 if (count == nroots) newilocal = NULL; 703 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 704 for (i = 0, count = 0; i < nroots; i++) { 705 if (roots[i].rank >= 0) { 706 newilocal[count] = i; 707 roots[count].rank = roots[i].rank; 708 roots[count].index = roots[i].index; 709 count++; 710 } 711 } 712 } 713 714 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 715 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 716 PetscCall(PetscFree2(roots, leaves)); 717 PetscFunctionReturn(PETSC_SUCCESS); 718 } 719 720 /*@ 721 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 722 723 Collective 724 725 Input Parameters: 726 + sf - communication object to duplicate 727 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 728 729 Output Parameter: 730 . newsf - new communication object 731 732 Level: beginner 733 734 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 735 @*/ 736 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 737 { 738 PetscSFType type; 739 MPI_Datatype dtype = MPIU_SCALAR; 740 741 PetscFunctionBegin; 742 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 743 PetscValidLogicalCollectiveEnum(sf, opt, 2); 744 PetscAssertPointer(newsf, 3); 745 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 746 PetscCall(PetscSFGetType(sf, &type)); 747 if (type) PetscCall(PetscSFSetType(*newsf, type)); 748 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 749 if (opt == PETSCSF_DUPLICATE_GRAPH) { 750 PetscSFCheckGraphSet(sf, 1); 751 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 752 PetscInt nroots, nleaves; 753 const PetscInt *ilocal; 754 const PetscSFNode *iremote; 755 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 756 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 757 } else { 758 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 759 } 760 } 761 /* Since oldtype is committed, so is newtype, according to MPI */ 762 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 763 (*newsf)->vscat.bs = sf->vscat.bs; 764 (*newsf)->vscat.unit = dtype; 765 (*newsf)->vscat.to_n = sf->vscat.to_n; 766 (*newsf)->vscat.from_n = sf->vscat.from_n; 767 /* Do not copy lsf. Build it on demand since it is rarely used */ 768 769 #if defined(PETSC_HAVE_DEVICE) 770 (*newsf)->backend = sf->backend; 771 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 772 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 773 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 774 #endif 775 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 776 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 777 PetscFunctionReturn(PETSC_SUCCESS); 778 } 779 780 /*@C 781 PetscSFGetGraph - Get the graph specifying a parallel star forest 782 783 Not Collective 784 785 Input Parameter: 786 . sf - star forest 787 788 Output Parameters: 789 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 790 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 791 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 792 - iremote - remote locations of root vertices for each leaf on the current process 793 794 Level: intermediate 795 796 Notes: 797 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 798 799 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 800 801 Fortran Note: 802 Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed 803 804 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 805 @*/ 806 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[]) 807 { 808 PetscFunctionBegin; 809 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 810 if (sf->ops->GetGraph) { 811 PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 812 } else { 813 if (nroots) *nroots = sf->nroots; 814 if (nleaves) *nleaves = sf->nleaves; 815 if (ilocal) *ilocal = sf->mine; 816 if (iremote) *iremote = sf->remote; 817 } 818 PetscFunctionReturn(PETSC_SUCCESS); 819 } 820 821 /*@ 822 PetscSFGetLeafRange - Get the active leaf ranges 823 824 Not Collective 825 826 Input Parameter: 827 . sf - star forest 828 829 Output Parameters: 830 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 831 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 832 833 Level: developer 834 835 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 836 @*/ 837 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 838 { 839 PetscFunctionBegin; 840 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 841 PetscSFCheckGraphSet(sf, 1); 842 if (minleaf) *minleaf = sf->minleaf; 843 if (maxleaf) *maxleaf = sf->maxleaf; 844 PetscFunctionReturn(PETSC_SUCCESS); 845 } 846 847 /*@ 848 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 849 850 Collective 851 852 Input Parameters: 853 + A - the star forest 854 . obj - Optional object that provides the prefix for the option names 855 - name - command line option 856 857 Level: intermediate 858 859 Note: 860 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 861 862 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 863 @*/ 864 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 865 { 866 PetscFunctionBegin; 867 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 868 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 869 PetscFunctionReturn(PETSC_SUCCESS); 870 } 871 872 /*@ 873 PetscSFView - view a star forest 874 875 Collective 876 877 Input Parameters: 878 + sf - star forest 879 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 880 881 Level: beginner 882 883 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 884 @*/ 885 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 886 { 887 PetscBool isascii; 888 PetscViewerFormat format; 889 890 PetscFunctionBegin; 891 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 892 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 893 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 894 PetscCheckSameComm(sf, 1, viewer, 2); 895 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 896 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 897 if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 898 PetscMPIInt rank; 899 PetscInt j; 900 901 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 902 PetscCall(PetscViewerASCIIPushTab(viewer)); 903 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 904 if (!sf->graphset) { 905 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 906 PetscCall(PetscViewerASCIIPopTab(viewer)); 907 PetscFunctionReturn(PETSC_SUCCESS); 908 } 909 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 910 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 911 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 912 for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 913 PetscCall(PetscViewerFlush(viewer)); 914 PetscCall(PetscViewerGetFormat(viewer, &format)); 915 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 916 PetscMPIInt *tmpranks, *perm; 917 918 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 919 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 920 for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i; 921 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 922 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 923 for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) { 924 PetscMPIInt i = perm[ii]; 925 926 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 927 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 928 } 929 PetscCall(PetscFree2(tmpranks, perm)); 930 } 931 PetscCall(PetscViewerFlush(viewer)); 932 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 933 } 934 PetscCall(PetscViewerASCIIPopTab(viewer)); 935 } 936 PetscTryTypeMethod(sf, View, viewer); 937 PetscFunctionReturn(PETSC_SUCCESS); 938 } 939 940 /*@C 941 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 942 943 Not Collective 944 945 Input Parameter: 946 . sf - star forest 947 948 Output Parameters: 949 + nranks - number of ranks referenced by local part 950 . ranks - [`nranks`] array of ranks 951 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 952 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL` 953 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL` 954 955 Level: developer 956 957 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 958 @*/ 959 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[]) 960 { 961 PetscFunctionBegin; 962 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 963 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 964 if (sf->ops->GetRootRanks) { 965 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 966 } else { 967 /* The generic implementation */ 968 if (nranks) *nranks = sf->nranks; 969 if (ranks) *ranks = sf->ranks; 970 if (roffset) *roffset = sf->roffset; 971 if (rmine) *rmine = sf->rmine; 972 if (rremote) *rremote = sf->rremote; 973 } 974 PetscFunctionReturn(PETSC_SUCCESS); 975 } 976 977 /*@C 978 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 979 980 Not Collective 981 982 Input Parameter: 983 . sf - star forest 984 985 Output Parameters: 986 + niranks - number of leaf ranks referencing roots on this process 987 . iranks - [`niranks`] array of ranks 988 . ioffset - [`niranks`+1] offset in `irootloc` for each rank 989 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 990 991 Level: developer 992 993 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 994 @*/ 995 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[]) 996 { 997 PetscFunctionBegin; 998 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 999 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 1000 if (sf->ops->GetLeafRanks) { 1001 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 1002 } else { 1003 PetscSFType type; 1004 PetscCall(PetscSFGetType(sf, &type)); 1005 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1006 } 1007 PetscFunctionReturn(PETSC_SUCCESS); 1008 } 1009 1010 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 1011 { 1012 PetscInt i; 1013 for (i = 0; i < n; i++) { 1014 if (needle == list[i]) return PETSC_TRUE; 1015 } 1016 return PETSC_FALSE; 1017 } 1018 1019 /*@C 1020 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 1021 1022 Collective 1023 1024 Input Parameters: 1025 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1026 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1027 1028 Level: developer 1029 1030 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 1031 @*/ 1032 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1033 { 1034 PetscHMapI table; 1035 PetscHashIter pos; 1036 PetscMPIInt size, groupsize, *groupranks, *ranks; 1037 PetscInt *rcount; 1038 PetscInt irank, sfnrank, ranksi; 1039 PetscMPIInt i, orank = -1; 1040 1041 PetscFunctionBegin; 1042 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1043 PetscSFCheckGraphSet(sf, 1); 1044 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1045 PetscCall(PetscHMapICreateWithSize(10, &table)); 1046 for (i = 0; i < sf->nleaves; i++) { 1047 /* Log 1-based rank */ 1048 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1049 } 1050 PetscCall(PetscHMapIGetSize(table, &sfnrank)); 1051 PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks)); 1052 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1053 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1054 PetscHashIterBegin(table, pos); 1055 for (i = 0; i < sf->nranks; i++) { 1056 PetscHashIterGetKey(table, pos, ranksi); 1057 PetscCall(PetscMPIIntCast(ranksi, &ranks[i])); 1058 PetscHashIterGetVal(table, pos, rcount[i]); 1059 PetscHashIterNext(table, pos); 1060 ranks[i]--; /* Convert back to 0-based */ 1061 } 1062 PetscCall(PetscHMapIDestroy(&table)); 1063 1064 /* We expect that dgroup is reliably "small" while nranks could be large */ 1065 { 1066 MPI_Group group = MPI_GROUP_NULL; 1067 PetscMPIInt *dgroupranks; 1068 1069 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1070 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1071 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1072 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1073 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1074 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1075 PetscCallMPI(MPI_Group_free(&group)); 1076 PetscCall(PetscFree(dgroupranks)); 1077 } 1078 1079 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1080 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1081 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1082 if (InList(ranks[i], groupsize, groupranks)) break; 1083 } 1084 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1085 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1086 } 1087 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1088 PetscMPIInt tmprank; 1089 PetscInt tmpcount; 1090 1091 tmprank = ranks[i]; 1092 tmpcount = rcount[i]; 1093 ranks[i] = ranks[sf->ndranks]; 1094 rcount[i] = rcount[sf->ndranks]; 1095 ranks[sf->ndranks] = tmprank; 1096 rcount[sf->ndranks] = tmpcount; 1097 sf->ndranks++; 1098 } 1099 } 1100 PetscCall(PetscFree(groupranks)); 1101 PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount)); 1102 if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1103 sf->roffset[0] = 0; 1104 for (i = 0; i < sf->nranks; i++) { 1105 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1106 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1107 rcount[i] = 0; 1108 } 1109 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1110 /* short circuit */ 1111 if (orank != sf->remote[i].rank) { 1112 /* Search for index of iremote[i].rank in sf->ranks */ 1113 PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank)); 1114 PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank)); 1115 if (irank < 0) { 1116 PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1117 if (irank >= 0) irank += sf->ndranks; 1118 } 1119 } 1120 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank); 1121 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1122 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1123 rcount[irank]++; 1124 } 1125 PetscCall(PetscFree2(rcount, ranks)); 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 /*@C 1130 PetscSFGetGroups - gets incoming and outgoing process groups 1131 1132 Collective 1133 1134 Input Parameter: 1135 . sf - star forest 1136 1137 Output Parameters: 1138 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1139 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1140 1141 Level: developer 1142 1143 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1144 @*/ 1145 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1146 { 1147 MPI_Group group = MPI_GROUP_NULL; 1148 1149 PetscFunctionBegin; 1150 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1151 if (sf->ingroup == MPI_GROUP_NULL) { 1152 PetscInt i; 1153 const PetscInt *indegree; 1154 PetscMPIInt rank, *outranks, *inranks, indegree0; 1155 PetscSFNode *remote; 1156 PetscSF bgcount; 1157 1158 /* Compute the number of incoming ranks */ 1159 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1160 for (i = 0; i < sf->nranks; i++) { 1161 remote[i].rank = sf->ranks[i]; 1162 remote[i].index = 0; 1163 } 1164 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1165 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1166 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1167 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1168 /* Enumerate the incoming ranks */ 1169 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1170 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1171 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1172 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1173 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1174 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1175 PetscCall(PetscMPIIntCast(indegree[0], &indegree0)); 1176 PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup)); 1177 PetscCallMPI(MPI_Group_free(&group)); 1178 PetscCall(PetscFree2(inranks, outranks)); 1179 PetscCall(PetscSFDestroy(&bgcount)); 1180 } 1181 *incoming = sf->ingroup; 1182 1183 if (sf->outgroup == MPI_GROUP_NULL) { 1184 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1185 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1186 PetscCallMPI(MPI_Group_free(&group)); 1187 } 1188 *outgoing = sf->outgroup; 1189 PetscFunctionReturn(PETSC_SUCCESS); 1190 } 1191 1192 /*@ 1193 PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 1194 1195 Collective 1196 1197 Input Parameter: 1198 . sf - star forest 1199 1200 Output Parameter: 1201 . rsf - the star forest with a single root per process to perform communications 1202 1203 Level: developer 1204 1205 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 1206 @*/ 1207 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 1208 { 1209 PetscFunctionBegin; 1210 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1211 PetscAssertPointer(rsf, 2); 1212 if (!sf->rankssf) { 1213 PetscSFNode *rremotes; 1214 const PetscMPIInt *ranks; 1215 PetscMPIInt nranks; 1216 1217 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 1218 PetscCall(PetscMalloc1(nranks, &rremotes)); 1219 for (PetscInt i = 0; i < nranks; i++) { 1220 rremotes[i].rank = ranks[i]; 1221 rremotes[i].index = 0; 1222 } 1223 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 1224 PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 1225 } 1226 *rsf = sf->rankssf; 1227 PetscFunctionReturn(PETSC_SUCCESS); 1228 } 1229 1230 /*@ 1231 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1232 1233 Collective 1234 1235 Input Parameter: 1236 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1237 1238 Output Parameter: 1239 . multi - star forest with split roots, such that each root has degree exactly 1 1240 1241 Level: developer 1242 1243 Note: 1244 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1245 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1246 edge, it is a candidate for future optimization that might involve its removal. 1247 1248 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1249 @*/ 1250 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1251 { 1252 PetscFunctionBegin; 1253 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1254 PetscAssertPointer(multi, 2); 1255 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1256 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1257 *multi = sf->multi; 1258 sf->multi->multi = sf->multi; 1259 PetscFunctionReturn(PETSC_SUCCESS); 1260 } 1261 if (!sf->multi) { 1262 const PetscInt *indegree; 1263 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1264 PetscSFNode *remote; 1265 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1266 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1267 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1268 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1269 inoffset[0] = 0; 1270 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1271 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1272 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1273 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1274 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1275 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1276 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1277 } 1278 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1279 for (i = 0; i < sf->nleaves; i++) { 1280 remote[i].rank = sf->remote[i].rank; 1281 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1282 } 1283 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1284 sf->multi->multi = sf->multi; 1285 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1286 if (sf->rankorder) { /* Sort the ranks */ 1287 PetscMPIInt rank; 1288 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1289 PetscSFNode *newremote; 1290 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1291 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1292 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1293 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1294 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1295 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1296 /* Sort the incoming ranks at each vertex, build the inverse map */ 1297 for (i = 0; i < sf->nroots; i++) { 1298 PetscInt j; 1299 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1300 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1301 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1302 } 1303 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1304 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1305 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1306 for (i = 0; i < sf->nleaves; i++) { 1307 newremote[i].rank = sf->remote[i].rank; 1308 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1309 } 1310 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1311 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1312 } 1313 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1314 } 1315 *multi = sf->multi; 1316 PetscFunctionReturn(PETSC_SUCCESS); 1317 } 1318 1319 /*@C 1320 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1321 1322 Collective 1323 1324 Input Parameters: 1325 + sf - original star forest 1326 . nselected - number of selected roots on this process 1327 - selected - indices of the selected roots on this process 1328 1329 Output Parameter: 1330 . esf - new star forest 1331 1332 Level: advanced 1333 1334 Note: 1335 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1336 be done by calling PetscSFGetGraph(). 1337 1338 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1339 @*/ 1340 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1341 { 1342 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1343 const PetscInt *ilocal; 1344 signed char *rootdata, *leafdata, *leafmem; 1345 const PetscSFNode *iremote; 1346 PetscSFNode *new_iremote; 1347 MPI_Comm comm; 1348 1349 PetscFunctionBegin; 1350 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1351 PetscSFCheckGraphSet(sf, 1); 1352 if (nselected) PetscAssertPointer(selected, 3); 1353 PetscAssertPointer(esf, 4); 1354 1355 PetscCall(PetscSFSetUp(sf)); 1356 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1357 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1358 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1359 1360 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1361 PetscBool dups; 1362 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1363 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1364 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1365 } 1366 1367 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1368 else { 1369 /* A generic version of creating embedded sf */ 1370 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1371 maxlocal = maxleaf - minleaf + 1; 1372 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1373 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1374 /* Tag selected roots and bcast to leaves */ 1375 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1376 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1377 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1378 1379 /* Build esf with leaves that are still connected */ 1380 esf_nleaves = 0; 1381 for (i = 0; i < nleaves; i++) { 1382 j = ilocal ? ilocal[i] : i; 1383 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1384 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1385 */ 1386 esf_nleaves += (leafdata[j] ? 1 : 0); 1387 } 1388 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1389 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1390 for (i = n = 0; i < nleaves; i++) { 1391 j = ilocal ? ilocal[i] : i; 1392 if (leafdata[j]) { 1393 new_ilocal[n] = j; 1394 new_iremote[n].rank = iremote[i].rank; 1395 new_iremote[n].index = iremote[i].index; 1396 ++n; 1397 } 1398 } 1399 PetscCall(PetscSFCreate(comm, esf)); 1400 PetscCall(PetscSFSetFromOptions(*esf)); 1401 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1402 PetscCall(PetscFree2(rootdata, leafmem)); 1403 } 1404 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 /*@C 1409 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1410 1411 Collective 1412 1413 Input Parameters: 1414 + sf - original star forest 1415 . nselected - number of selected leaves on this process 1416 - selected - indices of the selected leaves on this process 1417 1418 Output Parameter: 1419 . newsf - new star forest 1420 1421 Level: advanced 1422 1423 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1424 @*/ 1425 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1426 { 1427 const PetscSFNode *iremote; 1428 PetscSFNode *new_iremote; 1429 const PetscInt *ilocal; 1430 PetscInt i, nroots, *leaves, *new_ilocal; 1431 MPI_Comm comm; 1432 1433 PetscFunctionBegin; 1434 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1435 PetscSFCheckGraphSet(sf, 1); 1436 if (nselected) PetscAssertPointer(selected, 3); 1437 PetscAssertPointer(newsf, 4); 1438 1439 /* Uniq selected[] and put results in leaves[] */ 1440 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1441 PetscCall(PetscMalloc1(nselected, &leaves)); 1442 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1443 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1444 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1445 1446 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1447 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1448 else { 1449 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1450 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1451 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1452 for (i = 0; i < nselected; ++i) { 1453 const PetscInt l = leaves[i]; 1454 new_ilocal[i] = ilocal ? ilocal[l] : l; 1455 new_iremote[i].rank = iremote[l].rank; 1456 new_iremote[i].index = iremote[l].index; 1457 } 1458 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1459 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1460 } 1461 PetscCall(PetscFree(leaves)); 1462 PetscFunctionReturn(PETSC_SUCCESS); 1463 } 1464 1465 /*@C 1466 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1467 1468 Collective 1469 1470 Input Parameters: 1471 + sf - star forest on which to communicate 1472 . unit - data type associated with each node 1473 . rootdata - buffer to broadcast 1474 - op - operation to use for reduction 1475 1476 Output Parameter: 1477 . leafdata - buffer to be reduced with values from each leaf's respective root 1478 1479 Level: intermediate 1480 1481 Note: 1482 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1483 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1484 use `PetscSFBcastWithMemTypeBegin()` instead. 1485 1486 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1487 @*/ 1488 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1489 { 1490 PetscMemType rootmtype, leafmtype; 1491 1492 PetscFunctionBegin; 1493 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1494 PetscCall(PetscSFSetUp(sf)); 1495 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1496 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1497 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1498 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1499 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1500 PetscFunctionReturn(PETSC_SUCCESS); 1501 } 1502 1503 /*@C 1504 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1505 to `PetscSFBcastEnd()` 1506 1507 Collective 1508 1509 Input Parameters: 1510 + sf - star forest on which to communicate 1511 . unit - data type associated with each node 1512 . rootmtype - memory type of rootdata 1513 . rootdata - buffer to broadcast 1514 . leafmtype - memory type of leafdata 1515 - op - operation to use for reduction 1516 1517 Output Parameter: 1518 . leafdata - buffer to be reduced with values from each leaf's respective root 1519 1520 Level: intermediate 1521 1522 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1523 @*/ 1524 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1525 { 1526 PetscFunctionBegin; 1527 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1528 PetscCall(PetscSFSetUp(sf)); 1529 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1530 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1531 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1532 PetscFunctionReturn(PETSC_SUCCESS); 1533 } 1534 1535 /*@C 1536 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1537 1538 Collective 1539 1540 Input Parameters: 1541 + sf - star forest 1542 . unit - data type 1543 . rootdata - buffer to broadcast 1544 - op - operation to use for reduction 1545 1546 Output Parameter: 1547 . leafdata - buffer to be reduced with values from each leaf's respective root 1548 1549 Level: intermediate 1550 1551 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1552 @*/ 1553 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1554 { 1555 PetscFunctionBegin; 1556 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1557 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1558 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1559 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1560 PetscFunctionReturn(PETSC_SUCCESS); 1561 } 1562 1563 /*@C 1564 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1565 1566 Collective 1567 1568 Input Parameters: 1569 + sf - star forest 1570 . unit - data type 1571 . leafdata - values to reduce 1572 - op - reduction operation 1573 1574 Output Parameter: 1575 . rootdata - result of reduction of values from all leaves of each root 1576 1577 Level: intermediate 1578 1579 Note: 1580 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1581 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1582 use `PetscSFReduceWithMemTypeBegin()` instead. 1583 1584 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 1585 @*/ 1586 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1587 { 1588 PetscMemType rootmtype, leafmtype; 1589 1590 PetscFunctionBegin; 1591 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1592 PetscCall(PetscSFSetUp(sf)); 1593 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1594 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1595 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1596 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1597 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1598 PetscFunctionReturn(PETSC_SUCCESS); 1599 } 1600 1601 /*@C 1602 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1603 1604 Collective 1605 1606 Input Parameters: 1607 + sf - star forest 1608 . unit - data type 1609 . leafmtype - memory type of leafdata 1610 . leafdata - values to reduce 1611 . rootmtype - memory type of rootdata 1612 - op - reduction operation 1613 1614 Output Parameter: 1615 . rootdata - result of reduction of values from all leaves of each root 1616 1617 Level: intermediate 1618 1619 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1620 @*/ 1621 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1622 { 1623 PetscFunctionBegin; 1624 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1625 PetscCall(PetscSFSetUp(sf)); 1626 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1627 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1628 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1629 PetscFunctionReturn(PETSC_SUCCESS); 1630 } 1631 1632 /*@C 1633 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1634 1635 Collective 1636 1637 Input Parameters: 1638 + sf - star forest 1639 . unit - data type 1640 . leafdata - values to reduce 1641 - op - reduction operation 1642 1643 Output Parameter: 1644 . rootdata - result of reduction of values from all leaves of each root 1645 1646 Level: intermediate 1647 1648 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1649 @*/ 1650 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1651 { 1652 PetscFunctionBegin; 1653 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1654 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1655 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1656 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1657 PetscFunctionReturn(PETSC_SUCCESS); 1658 } 1659 1660 /*@C 1661 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1662 to be completed with `PetscSFFetchAndOpEnd()` 1663 1664 Collective 1665 1666 Input Parameters: 1667 + sf - star forest 1668 . unit - data type 1669 . leafdata - leaf values to use in reduction 1670 - op - operation to use for reduction 1671 1672 Output Parameters: 1673 + rootdata - root values to be updated, input state is seen by first process to perform an update 1674 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1675 1676 Level: advanced 1677 1678 Note: 1679 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1680 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1681 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1682 integers. 1683 1684 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1685 @*/ 1686 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1687 { 1688 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1689 1690 PetscFunctionBegin; 1691 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1692 PetscCall(PetscSFSetUp(sf)); 1693 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1694 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1695 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1696 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1697 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1698 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1699 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 /*@C 1704 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1705 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1706 1707 Collective 1708 1709 Input Parameters: 1710 + sf - star forest 1711 . unit - data type 1712 . rootmtype - memory type of rootdata 1713 . leafmtype - memory type of leafdata 1714 . leafdata - leaf values to use in reduction 1715 . leafupdatemtype - memory type of leafupdate 1716 - op - operation to use for reduction 1717 1718 Output Parameters: 1719 + rootdata - root values to be updated, input state is seen by first process to perform an update 1720 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1721 1722 Level: advanced 1723 1724 Note: 1725 See `PetscSFFetchAndOpBegin()` for more details. 1726 1727 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1728 @*/ 1729 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1730 { 1731 PetscFunctionBegin; 1732 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1733 PetscCall(PetscSFSetUp(sf)); 1734 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1735 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1736 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1737 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1738 PetscFunctionReturn(PETSC_SUCCESS); 1739 } 1740 1741 /*@C 1742 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1743 to fetch values from roots and update atomically by applying operation using my leaf value 1744 1745 Collective 1746 1747 Input Parameters: 1748 + sf - star forest 1749 . unit - data type 1750 . leafdata - leaf values to use in reduction 1751 - op - operation to use for reduction 1752 1753 Output Parameters: 1754 + rootdata - root values to be updated, input state is seen by first process to perform an update 1755 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1756 1757 Level: advanced 1758 1759 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1760 @*/ 1761 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1762 { 1763 PetscFunctionBegin; 1764 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1765 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1766 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1767 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1768 PetscFunctionReturn(PETSC_SUCCESS); 1769 } 1770 1771 /*@C 1772 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1773 1774 Collective 1775 1776 Input Parameter: 1777 . sf - star forest 1778 1779 Output Parameter: 1780 . degree - degree of each root vertex 1781 1782 Level: advanced 1783 1784 Note: 1785 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1786 1787 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1788 @*/ 1789 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[]) 1790 { 1791 PetscFunctionBegin; 1792 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1793 PetscSFCheckGraphSet(sf, 1); 1794 PetscAssertPointer(degree, 2); 1795 if (!sf->degreeknown) { 1796 PetscInt i, nroots = sf->nroots, maxlocal; 1797 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1798 maxlocal = sf->maxleaf - sf->minleaf + 1; 1799 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1800 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1801 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1802 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1803 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1804 } 1805 *degree = NULL; 1806 PetscFunctionReturn(PETSC_SUCCESS); 1807 } 1808 1809 /*@C 1810 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1811 1812 Collective 1813 1814 Input Parameter: 1815 . sf - star forest 1816 1817 Output Parameter: 1818 . degree - degree of each root vertex 1819 1820 Level: developer 1821 1822 Note: 1823 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1824 1825 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1826 @*/ 1827 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[]) 1828 { 1829 PetscFunctionBegin; 1830 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1831 PetscSFCheckGraphSet(sf, 1); 1832 PetscAssertPointer(degree, 2); 1833 if (!sf->degreeknown) { 1834 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1835 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1836 PetscCall(PetscFree(sf->degreetmp)); 1837 sf->degreeknown = PETSC_TRUE; 1838 } 1839 *degree = sf->degree; 1840 PetscFunctionReturn(PETSC_SUCCESS); 1841 } 1842 1843 /*@C 1844 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1845 Each multi-root is assigned index of the corresponding original root. 1846 1847 Collective 1848 1849 Input Parameters: 1850 + sf - star forest 1851 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1852 1853 Output Parameters: 1854 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1855 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1856 1857 Level: developer 1858 1859 Note: 1860 The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1861 1862 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1863 @*/ 1864 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1865 { 1866 PetscSF msf; 1867 PetscInt k = 0, nroots, nmroots; 1868 1869 PetscFunctionBegin; 1870 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1871 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1872 if (nroots) PetscAssertPointer(degree, 2); 1873 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1874 PetscAssertPointer(multiRootsOrigNumbering, 4); 1875 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1876 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1877 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1878 for (PetscInt i = 0; i < nroots; i++) { 1879 if (!degree[i]) continue; 1880 for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1881 } 1882 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1883 if (nMultiRoots) *nMultiRoots = nmroots; 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 /*@C 1888 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1889 1890 Collective 1891 1892 Input Parameters: 1893 + sf - star forest 1894 . unit - data type 1895 - leafdata - leaf data to gather to roots 1896 1897 Output Parameter: 1898 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1899 1900 Level: intermediate 1901 1902 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1903 @*/ 1904 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1905 { 1906 PetscSF multi = NULL; 1907 1908 PetscFunctionBegin; 1909 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1910 PetscCall(PetscSFSetUp(sf)); 1911 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1912 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1913 PetscFunctionReturn(PETSC_SUCCESS); 1914 } 1915 1916 /*@C 1917 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1918 1919 Collective 1920 1921 Input Parameters: 1922 + sf - star forest 1923 . unit - data type 1924 - leafdata - leaf data to gather to roots 1925 1926 Output Parameter: 1927 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1928 1929 Level: intermediate 1930 1931 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1932 @*/ 1933 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1934 { 1935 PetscSF multi = NULL; 1936 1937 PetscFunctionBegin; 1938 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1939 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1940 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1941 PetscFunctionReturn(PETSC_SUCCESS); 1942 } 1943 1944 /*@C 1945 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1946 1947 Collective 1948 1949 Input Parameters: 1950 + sf - star forest 1951 . unit - data type 1952 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1953 1954 Output Parameter: 1955 . leafdata - leaf data to be update with personal data from each respective root 1956 1957 Level: intermediate 1958 1959 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 1960 @*/ 1961 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1962 { 1963 PetscSF multi = NULL; 1964 1965 PetscFunctionBegin; 1966 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1967 PetscCall(PetscSFSetUp(sf)); 1968 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1969 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1970 PetscFunctionReturn(PETSC_SUCCESS); 1971 } 1972 1973 /*@C 1974 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1975 1976 Collective 1977 1978 Input Parameters: 1979 + sf - star forest 1980 . unit - data type 1981 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1982 1983 Output Parameter: 1984 . leafdata - leaf data to be update with personal data from each respective root 1985 1986 Level: intermediate 1987 1988 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1989 @*/ 1990 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1991 { 1992 PetscSF multi = NULL; 1993 1994 PetscFunctionBegin; 1995 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1996 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1997 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1998 PetscFunctionReturn(PETSC_SUCCESS); 1999 } 2000 2001 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 2002 { 2003 PetscInt i, n, nleaves; 2004 const PetscInt *ilocal = NULL; 2005 PetscHSetI seen; 2006 2007 PetscFunctionBegin; 2008 if (PetscDefined(USE_DEBUG)) { 2009 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 2010 PetscCall(PetscHSetICreate(&seen)); 2011 for (i = 0; i < nleaves; i++) { 2012 const PetscInt leaf = ilocal ? ilocal[i] : i; 2013 PetscCall(PetscHSetIAdd(seen, leaf)); 2014 } 2015 PetscCall(PetscHSetIGetSize(seen, &n)); 2016 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 2017 PetscCall(PetscHSetIDestroy(&seen)); 2018 } 2019 PetscFunctionReturn(PETSC_SUCCESS); 2020 } 2021 2022 /*@ 2023 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2024 2025 Input Parameters: 2026 + sfA - The first `PetscSF` 2027 - sfB - The second `PetscSF` 2028 2029 Output Parameter: 2030 . sfBA - The composite `PetscSF` 2031 2032 Level: developer 2033 2034 Notes: 2035 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2036 forests, i.e. the same leaf is not connected with different roots. 2037 2038 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 2039 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 2040 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 2041 `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 2042 2043 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2044 @*/ 2045 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2046 { 2047 const PetscSFNode *remotePointsA, *remotePointsB; 2048 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 2049 const PetscInt *localPointsA, *localPointsB; 2050 PetscInt *localPointsBA; 2051 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 2052 PetscBool denseB; 2053 2054 PetscFunctionBegin; 2055 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2056 PetscSFCheckGraphSet(sfA, 1); 2057 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2058 PetscSFCheckGraphSet(sfB, 2); 2059 PetscCheckSameComm(sfA, 1, sfB, 2); 2060 PetscAssertPointer(sfBA, 3); 2061 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2062 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2063 2064 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2065 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2066 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2067 numRootsB; otherwise, garbage will be broadcasted. 2068 Example (comm size = 1): 2069 sfA: 0 <- (0, 0) 2070 sfB: 100 <- (0, 0) 2071 101 <- (0, 1) 2072 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2073 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2074 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2075 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2076 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2077 for (i = 0; i < numRootsB; i++) { 2078 reorderedRemotePointsA[i].rank = -1; 2079 reorderedRemotePointsA[i].index = -1; 2080 } 2081 for (i = 0; i < numLeavesA; i++) { 2082 PetscInt localp = localPointsA ? localPointsA[i] : i; 2083 2084 if (localp >= numRootsB) continue; 2085 reorderedRemotePointsA[localp] = remotePointsA[i]; 2086 } 2087 remotePointsA = reorderedRemotePointsA; 2088 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2089 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2090 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2091 leafdataB[i].rank = -1; 2092 leafdataB[i].index = -1; 2093 } 2094 PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2095 PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2096 PetscCall(PetscFree(reorderedRemotePointsA)); 2097 2098 denseB = (PetscBool)!localPointsB; 2099 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2100 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2101 else numLeavesBA++; 2102 } 2103 if (denseB) { 2104 localPointsBA = NULL; 2105 remotePointsBA = leafdataB; 2106 } else { 2107 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2108 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2109 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2110 const PetscInt l = localPointsB ? localPointsB[i] : i; 2111 2112 if (leafdataB[l - minleaf].rank == -1) continue; 2113 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2114 localPointsBA[numLeavesBA] = l; 2115 numLeavesBA++; 2116 } 2117 PetscCall(PetscFree(leafdataB)); 2118 } 2119 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2120 PetscCall(PetscSFSetFromOptions(*sfBA)); 2121 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2122 PetscFunctionReturn(PETSC_SUCCESS); 2123 } 2124 2125 /*@ 2126 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2127 2128 Input Parameters: 2129 + sfA - The first `PetscSF` 2130 - sfB - The second `PetscSF` 2131 2132 Output Parameter: 2133 . sfBA - The composite `PetscSF`. 2134 2135 Level: developer 2136 2137 Notes: 2138 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2139 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2140 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2141 2142 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2143 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2144 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 2145 on `sfA`, then 2146 a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 2147 2148 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2149 @*/ 2150 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2151 { 2152 const PetscSFNode *remotePointsA, *remotePointsB; 2153 PetscSFNode *remotePointsBA; 2154 const PetscInt *localPointsA, *localPointsB; 2155 PetscSFNode *reorderedRemotePointsA = NULL; 2156 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2157 MPI_Op op; 2158 #if defined(PETSC_USE_64BIT_INDICES) 2159 PetscBool iswin; 2160 #endif 2161 2162 PetscFunctionBegin; 2163 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2164 PetscSFCheckGraphSet(sfA, 1); 2165 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2166 PetscSFCheckGraphSet(sfB, 2); 2167 PetscCheckSameComm(sfA, 1, sfB, 2); 2168 PetscAssertPointer(sfBA, 3); 2169 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2170 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2171 2172 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2173 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2174 2175 /* TODO: Check roots of sfB have degree of 1 */ 2176 /* Once we implement it, we can replace the MPI_MAXLOC 2177 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2178 We use MPI_MAXLOC only to have a deterministic output from this routine if 2179 the root condition is not meet. 2180 */ 2181 op = MPI_MAXLOC; 2182 #if defined(PETSC_USE_64BIT_INDICES) 2183 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2184 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2185 if (iswin) op = MPI_REPLACE; 2186 #endif 2187 2188 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2189 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2190 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2191 reorderedRemotePointsA[i].rank = -1; 2192 reorderedRemotePointsA[i].index = -1; 2193 } 2194 if (localPointsA) { 2195 for (i = 0; i < numLeavesA; i++) { 2196 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2197 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2198 } 2199 } else { 2200 for (i = 0; i < numLeavesA; i++) { 2201 if (i > maxleaf || i < minleaf) continue; 2202 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2203 } 2204 } 2205 2206 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2207 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2208 for (i = 0; i < numRootsB; i++) { 2209 remotePointsBA[i].rank = -1; 2210 remotePointsBA[i].index = -1; 2211 } 2212 2213 PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2214 PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2215 PetscCall(PetscFree(reorderedRemotePointsA)); 2216 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2217 if (remotePointsBA[i].rank == -1) continue; 2218 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2219 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2220 localPointsBA[numLeavesBA] = i; 2221 numLeavesBA++; 2222 } 2223 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2224 PetscCall(PetscSFSetFromOptions(*sfBA)); 2225 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2226 PetscFunctionReturn(PETSC_SUCCESS); 2227 } 2228 2229 /* 2230 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2231 2232 Input Parameter: 2233 . sf - The global `PetscSF` 2234 2235 Output Parameter: 2236 . out - The local `PetscSF` 2237 2238 .seealso: `PetscSF`, `PetscSFCreate()` 2239 */ 2240 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2241 { 2242 MPI_Comm comm; 2243 PetscMPIInt myrank; 2244 const PetscInt *ilocal; 2245 const PetscSFNode *iremote; 2246 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2247 PetscSFNode *liremote; 2248 PetscSF lsf; 2249 2250 PetscFunctionBegin; 2251 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2252 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2253 else { 2254 PetscMPIInt irank; 2255 2256 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2257 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2258 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2259 2260 /* Find out local edges and build a local SF */ 2261 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2262 for (i = lnleaves = 0; i < nleaves; i++) { 2263 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2264 if (irank == myrank) lnleaves++; 2265 } 2266 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2267 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2268 2269 for (i = j = 0; i < nleaves; i++) { 2270 PetscCall(PetscMPIIntCast(iremote[i].rank, &irank)); 2271 if (irank == myrank) { 2272 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2273 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2274 liremote[j].index = iremote[i].index; 2275 j++; 2276 } 2277 } 2278 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2279 PetscCall(PetscSFSetFromOptions(lsf)); 2280 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2281 PetscCall(PetscSFSetUp(lsf)); 2282 *out = lsf; 2283 } 2284 PetscFunctionReturn(PETSC_SUCCESS); 2285 } 2286 2287 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2288 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2289 { 2290 PetscMemType rootmtype, leafmtype; 2291 2292 PetscFunctionBegin; 2293 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2294 PetscCall(PetscSFSetUp(sf)); 2295 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2296 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2297 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2298 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2299 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2300 PetscFunctionReturn(PETSC_SUCCESS); 2301 } 2302 2303 /*@ 2304 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2305 2306 Input Parameters: 2307 + comm - the communicator 2308 . nsfs - the number of input `PetscSF` 2309 . sfs - the array of input `PetscSF` 2310 . rootMode - the root mode specifying how roots are handled 2311 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2312 2313 Output Parameter: 2314 . newsf - The resulting `PetscSF` 2315 2316 Level: advanced 2317 2318 Notes: 2319 The communicator of all `PetscSF`s in `sfs` must be comm. 2320 2321 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2322 2323 The offsets in `leafOffsets` are added to the original leaf indices. 2324 2325 If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2326 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2327 2328 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2329 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2330 2331 All root modes retain the essential connectivity condition. 2332 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2333 Parameter `rootMode` controls how the input root spaces are combined. 2334 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2335 and is also the same in the output `PetscSF`. 2336 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2337 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2338 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2339 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2340 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2341 the original root ranks are ignored. 2342 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2343 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 2344 to keep the load balancing. 2345 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 2346 2347 Example: 2348 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2349 .vb 2350 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2351 for m in {local,global,shared}; do 2352 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2353 done 2354 .ve 2355 we generate two identical `PetscSF`s sf_0 and sf_1, 2356 .vb 2357 PetscSF Object: sf_0 2 MPI processes 2358 type: basic 2359 rank #leaves #roots 2360 [ 0] 4 2 2361 [ 1] 4 2 2362 leaves roots roots in global numbering 2363 ( 0, 0) <- ( 0, 0) = 0 2364 ( 0, 1) <- ( 0, 1) = 1 2365 ( 0, 2) <- ( 1, 0) = 2 2366 ( 0, 3) <- ( 1, 1) = 3 2367 ( 1, 0) <- ( 0, 0) = 0 2368 ( 1, 1) <- ( 0, 1) = 1 2369 ( 1, 2) <- ( 1, 0) = 2 2370 ( 1, 3) <- ( 1, 1) = 3 2371 .ve 2372 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2373 .vb 2374 rootMode = local: 2375 PetscSF Object: result_sf 2 MPI processes 2376 type: basic 2377 rank #leaves #roots 2378 [ 0] 8 4 2379 [ 1] 8 4 2380 leaves roots roots in global numbering 2381 ( 0, 0) <- ( 0, 0) = 0 2382 ( 0, 1) <- ( 0, 1) = 1 2383 ( 0, 2) <- ( 1, 0) = 4 2384 ( 0, 3) <- ( 1, 1) = 5 2385 ( 0, 4) <- ( 0, 2) = 2 2386 ( 0, 5) <- ( 0, 3) = 3 2387 ( 0, 6) <- ( 1, 2) = 6 2388 ( 0, 7) <- ( 1, 3) = 7 2389 ( 1, 0) <- ( 0, 0) = 0 2390 ( 1, 1) <- ( 0, 1) = 1 2391 ( 1, 2) <- ( 1, 0) = 4 2392 ( 1, 3) <- ( 1, 1) = 5 2393 ( 1, 4) <- ( 0, 2) = 2 2394 ( 1, 5) <- ( 0, 3) = 3 2395 ( 1, 6) <- ( 1, 2) = 6 2396 ( 1, 7) <- ( 1, 3) = 7 2397 2398 rootMode = global: 2399 PetscSF Object: result_sf 2 MPI processes 2400 type: basic 2401 rank #leaves #roots 2402 [ 0] 8 4 2403 [ 1] 8 4 2404 leaves roots roots in global numbering 2405 ( 0, 0) <- ( 0, 0) = 0 2406 ( 0, 1) <- ( 0, 1) = 1 2407 ( 0, 2) <- ( 0, 2) = 2 2408 ( 0, 3) <- ( 0, 3) = 3 2409 ( 0, 4) <- ( 1, 0) = 4 2410 ( 0, 5) <- ( 1, 1) = 5 2411 ( 0, 6) <- ( 1, 2) = 6 2412 ( 0, 7) <- ( 1, 3) = 7 2413 ( 1, 0) <- ( 0, 0) = 0 2414 ( 1, 1) <- ( 0, 1) = 1 2415 ( 1, 2) <- ( 0, 2) = 2 2416 ( 1, 3) <- ( 0, 3) = 3 2417 ( 1, 4) <- ( 1, 0) = 4 2418 ( 1, 5) <- ( 1, 1) = 5 2419 ( 1, 6) <- ( 1, 2) = 6 2420 ( 1, 7) <- ( 1, 3) = 7 2421 2422 rootMode = shared: 2423 PetscSF Object: result_sf 2 MPI processes 2424 type: basic 2425 rank #leaves #roots 2426 [ 0] 8 2 2427 [ 1] 8 2 2428 leaves roots roots in global numbering 2429 ( 0, 0) <- ( 0, 0) = 0 2430 ( 0, 1) <- ( 0, 1) = 1 2431 ( 0, 2) <- ( 1, 0) = 2 2432 ( 0, 3) <- ( 1, 1) = 3 2433 ( 0, 4) <- ( 0, 0) = 0 2434 ( 0, 5) <- ( 0, 1) = 1 2435 ( 0, 6) <- ( 1, 0) = 2 2436 ( 0, 7) <- ( 1, 1) = 3 2437 ( 1, 0) <- ( 0, 0) = 0 2438 ( 1, 1) <- ( 0, 1) = 1 2439 ( 1, 2) <- ( 1, 0) = 2 2440 ( 1, 3) <- ( 1, 1) = 3 2441 ( 1, 4) <- ( 0, 0) = 0 2442 ( 1, 5) <- ( 0, 1) = 1 2443 ( 1, 6) <- ( 1, 0) = 2 2444 ( 1, 7) <- ( 1, 1) = 3 2445 .ve 2446 2447 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2448 @*/ 2449 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2450 { 2451 PetscInt i, s, nLeaves, nRoots; 2452 PetscInt *leafArrayOffsets; 2453 PetscInt *ilocal_new; 2454 PetscSFNode *iremote_new; 2455 PetscBool all_ilocal_null = PETSC_FALSE; 2456 PetscLayout glayout = NULL; 2457 PetscInt *gremote = NULL; 2458 PetscMPIInt rank, size; 2459 2460 PetscFunctionBegin; 2461 if (PetscDefined(USE_DEBUG)) { 2462 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2463 2464 PetscCall(PetscSFCreate(comm, &dummy)); 2465 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2466 PetscAssertPointer(sfs, 3); 2467 for (i = 0; i < nsfs; i++) { 2468 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2469 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2470 } 2471 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2472 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2473 PetscAssertPointer(newsf, 6); 2474 PetscCall(PetscSFDestroy(&dummy)); 2475 } 2476 if (!nsfs) { 2477 PetscCall(PetscSFCreate(comm, newsf)); 2478 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2479 PetscFunctionReturn(PETSC_SUCCESS); 2480 } 2481 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2482 PetscCallMPI(MPI_Comm_size(comm, &size)); 2483 2484 /* Calculate leaf array offsets */ 2485 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2486 leafArrayOffsets[0] = 0; 2487 for (s = 0; s < nsfs; s++) { 2488 PetscInt nl; 2489 2490 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2491 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2492 } 2493 nLeaves = leafArrayOffsets[nsfs]; 2494 2495 /* Calculate number of roots */ 2496 switch (rootMode) { 2497 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2498 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2499 if (PetscDefined(USE_DEBUG)) { 2500 for (s = 1; s < nsfs; s++) { 2501 PetscInt nr; 2502 2503 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2504 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2505 } 2506 } 2507 } break; 2508 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2509 /* Calculate also global layout in this case */ 2510 PetscInt *nls; 2511 PetscLayout *lts; 2512 PetscInt **inds; 2513 PetscInt j; 2514 PetscInt rootOffset = 0; 2515 2516 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2517 PetscCall(PetscLayoutCreate(comm, &glayout)); 2518 glayout->bs = 1; 2519 glayout->n = 0; 2520 glayout->N = 0; 2521 for (s = 0; s < nsfs; s++) { 2522 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2523 glayout->n += lts[s]->n; 2524 glayout->N += lts[s]->N; 2525 } 2526 PetscCall(PetscLayoutSetUp(glayout)); 2527 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2528 for (s = 0, j = 0; s < nsfs; s++) { 2529 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2530 rootOffset += lts[s]->N; 2531 PetscCall(PetscLayoutDestroy(<s[s])); 2532 PetscCall(PetscFree(inds[s])); 2533 } 2534 PetscCall(PetscFree3(lts, nls, inds)); 2535 nRoots = glayout->N; 2536 } break; 2537 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2538 /* nRoots calculated later in this case */ 2539 break; 2540 default: 2541 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2542 } 2543 2544 if (!leafOffsets) { 2545 all_ilocal_null = PETSC_TRUE; 2546 for (s = 0; s < nsfs; s++) { 2547 const PetscInt *ilocal; 2548 2549 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2550 if (ilocal) { 2551 all_ilocal_null = PETSC_FALSE; 2552 break; 2553 } 2554 } 2555 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2556 } 2557 2558 /* Renumber and concatenate local leaves */ 2559 ilocal_new = NULL; 2560 if (!all_ilocal_null) { 2561 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2562 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2563 for (s = 0; s < nsfs; s++) { 2564 const PetscInt *ilocal; 2565 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2566 PetscInt i, nleaves_l; 2567 2568 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2569 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2570 } 2571 } 2572 2573 /* Renumber and concatenate remote roots */ 2574 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2575 PetscInt rootOffset = 0; 2576 2577 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2578 for (i = 0; i < nLeaves; i++) { 2579 iremote_new[i].rank = -1; 2580 iremote_new[i].index = -1; 2581 } 2582 for (s = 0; s < nsfs; s++) { 2583 PetscInt i, nl, nr; 2584 PetscSF tmp_sf; 2585 const PetscSFNode *iremote; 2586 PetscSFNode *tmp_rootdata; 2587 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2588 2589 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2590 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2591 /* create helper SF with contiguous leaves */ 2592 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2593 PetscCall(PetscSFSetUp(tmp_sf)); 2594 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2595 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2596 for (i = 0; i < nr; i++) { 2597 tmp_rootdata[i].index = i + rootOffset; 2598 tmp_rootdata[i].rank = rank; 2599 } 2600 rootOffset += nr; 2601 } else { 2602 for (i = 0; i < nr; i++) { 2603 tmp_rootdata[i].index = i; 2604 tmp_rootdata[i].rank = rank; 2605 } 2606 } 2607 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2608 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2609 PetscCall(PetscSFDestroy(&tmp_sf)); 2610 PetscCall(PetscFree(tmp_rootdata)); 2611 } 2612 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2613 2614 /* Build the new SF */ 2615 PetscCall(PetscSFCreate(comm, newsf)); 2616 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2617 } else { 2618 /* Build the new SF */ 2619 PetscCall(PetscSFCreate(comm, newsf)); 2620 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2621 } 2622 PetscCall(PetscSFSetUp(*newsf)); 2623 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2624 PetscCall(PetscLayoutDestroy(&glayout)); 2625 PetscCall(PetscFree(gremote)); 2626 PetscCall(PetscFree(leafArrayOffsets)); 2627 PetscFunctionReturn(PETSC_SUCCESS); 2628 } 2629 2630 /*@ 2631 PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications. 2632 2633 Collective 2634 2635 Input Parameters: 2636 + sf - star forest 2637 . unit - the data type contained within the root and leaf data 2638 . rootdata - root data that will be used for multiple PetscSF communications 2639 - leafdata - leaf data that will be used for multiple PetscSF communications 2640 2641 Level: advanced 2642 2643 Notes: 2644 Implementations of `PetscSF` can make optimizations 2645 for repeated communication using the same memory regions, but these optimizations 2646 can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed. 2647 The intended pattern is 2648 2649 .vb 2650 PetscMalloc2(nroots, &rootdata, nleaves, &leafdata); 2651 2652 PetscSFRegisterPersistent(sf, unit, rootdata, leafdata); 2653 // repeated use of rootdata and leafdata will now be optimized 2654 2655 PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE); 2656 PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE); 2657 // ... 2658 PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM); 2659 PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM); 2660 // ... (other communications) 2661 2662 // rootdata and leafdata must be deregistered before freeing 2663 // skipping this can lead to undefined behavior including 2664 // deadlocks 2665 PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata); 2666 2667 // it is now safe to free rootdata and leafdata 2668 PetscFree2(rootdata, leafdata); 2669 .ve 2670 2671 If you do not register `rootdata` and `leafdata` it will not cause an error, 2672 but optimizations that reduce the setup time for each communication cannot be 2673 made. Currently, the only implementation of `PetscSF` that benefits from 2674 `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default 2675 `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`. 2676 2677 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()` 2678 @*/ 2679 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2680 { 2681 PetscFunctionBegin; 2682 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2683 PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2684 PetscFunctionReturn(PETSC_SUCCESS); 2685 } 2686 2687 /*@ 2688 PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded. 2689 2690 Collective 2691 2692 Input Parameters: 2693 + sf - star forest 2694 . unit - the data type contained within the root and leaf data 2695 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()` 2696 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()` 2697 2698 Level: advanced 2699 2700 Note: 2701 See `PetscSFRegisterPersistent()` for when/how to use this function. 2702 2703 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()` 2704 @*/ 2705 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 2706 { 2707 PetscFunctionBegin; 2708 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2709 PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata)); 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size) 2714 { 2715 MPI_Aint lb, lb_true, bytes, bytes_true; 2716 2717 PetscFunctionBegin; 2718 PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 2719 PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 2720 PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 2721 *size = bytes; 2722 PetscFunctionReturn(PETSC_SUCCESS); 2723 } 2724