1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #include <petscdevice_cuda.h> 9 #endif 10 11 #if defined(PETSC_HAVE_HIP) 12 #include <hip/hip_runtime.h> 13 #endif 14 15 #if defined(PETSC_CLANG_STATIC_ANALYZER) 16 extern void PetscSFCheckGraphSet(PetscSF, int); 17 #else 18 #if defined(PETSC_USE_DEBUG) 19 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 20 #else 21 #define PetscSFCheckGraphSet(sf, arg) \ 22 do { \ 23 } while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 29 30 /*@ 31 PetscSFCreate - create a star forest communication context 32 33 Collective 34 35 Input Parameter: 36 . comm - communicator on which the star forest will operate 37 38 Output Parameter: 39 . sf - new star forest context 40 41 Options Database Key: 42 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 43 . -sf_type window - Use MPI-3 one-sided window for communication 44 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 46 47 Level: intermediate 48 49 Note: 50 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 51 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 52 `SF`s are optimized and they have better performance than the general `SF`s. 53 54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 55 @*/ 56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 57 { 58 PetscSF b; 59 60 PetscFunctionBegin; 61 PetscAssertPointer(sf, 2); 62 PetscCall(PetscSFInitializePackage()); 63 64 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 65 66 b->nroots = -1; 67 b->nleaves = -1; 68 b->minleaf = PETSC_MAX_INT; 69 b->maxleaf = PETSC_MIN_INT; 70 b->nranks = -1; 71 b->rankorder = PETSC_TRUE; 72 b->ingroup = MPI_GROUP_NULL; 73 b->outgroup = MPI_GROUP_NULL; 74 b->graphset = PETSC_FALSE; 75 #if defined(PETSC_HAVE_DEVICE) 76 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 77 b->use_stream_aware_mpi = PETSC_FALSE; 78 b->unknown_input_stream = PETSC_FALSE; 79 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 80 b->backend = PETSCSF_BACKEND_KOKKOS; 81 #elif defined(PETSC_HAVE_CUDA) 82 b->backend = PETSCSF_BACKEND_CUDA; 83 #elif defined(PETSC_HAVE_HIP) 84 b->backend = PETSCSF_BACKEND_HIP; 85 #endif 86 87 #if defined(PETSC_HAVE_NVSHMEM) 88 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 89 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 90 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 91 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 92 #endif 93 #endif 94 b->vscat.from_n = -1; 95 b->vscat.to_n = -1; 96 b->vscat.unit = MPIU_SCALAR; 97 *sf = b; 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 /*@ 102 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 103 104 Collective 105 106 Input Parameter: 107 . sf - star forest 108 109 Level: advanced 110 111 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 112 @*/ 113 PetscErrorCode PetscSFReset(PetscSF sf) 114 { 115 PetscFunctionBegin; 116 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 117 PetscTryTypeMethod(sf, Reset); 118 sf->nroots = -1; 119 sf->nleaves = -1; 120 sf->minleaf = PETSC_MAX_INT; 121 sf->maxleaf = PETSC_MIN_INT; 122 sf->mine = NULL; 123 sf->remote = NULL; 124 sf->graphset = PETSC_FALSE; 125 PetscCall(PetscFree(sf->mine_alloc)); 126 PetscCall(PetscFree(sf->remote_alloc)); 127 sf->nranks = -1; 128 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 129 sf->degreeknown = PETSC_FALSE; 130 PetscCall(PetscFree(sf->degree)); 131 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 132 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 133 if (sf->multi) sf->multi->multi = NULL; 134 PetscCall(PetscSFDestroy(&sf->multi)); 135 PetscCall(PetscLayoutDestroy(&sf->map)); 136 137 #if defined(PETSC_HAVE_DEVICE) 138 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 139 #endif 140 141 sf->setupcalled = PETSC_FALSE; 142 PetscFunctionReturn(PETSC_SUCCESS); 143 } 144 145 /*@C 146 PetscSFSetType - Set the `PetscSF` communication implementation 147 148 Collective 149 150 Input Parameters: 151 + sf - the `PetscSF` context 152 - type - a known method 153 .vb 154 PETSCSFWINDOW - MPI-2/3 one-sided 155 PETSCSFBASIC - basic implementation using MPI-1 two-sided 156 .ve 157 158 Options Database Key: 159 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 160 161 Level: intermediate 162 163 Notes: 164 See `PetscSFType` for possible values 165 166 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 167 @*/ 168 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 169 { 170 PetscBool match; 171 PetscErrorCode (*r)(PetscSF); 172 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 175 PetscAssertPointer(type, 2); 176 177 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 178 if (match) PetscFunctionReturn(PETSC_SUCCESS); 179 180 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 181 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 182 /* Destroy the previous PetscSF implementation context */ 183 PetscTryTypeMethod(sf, Destroy); 184 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 185 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 186 PetscCall((*r)(sf)); 187 PetscFunctionReturn(PETSC_SUCCESS); 188 } 189 190 /*@C 191 PetscSFGetType - Get the `PetscSF` communication implementation 192 193 Not Collective 194 195 Input Parameter: 196 . sf - the `PetscSF` context 197 198 Output Parameter: 199 . type - the `PetscSF` type name 200 201 Level: intermediate 202 203 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 204 @*/ 205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 206 { 207 PetscFunctionBegin; 208 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 209 PetscAssertPointer(type, 2); 210 *type = ((PetscObject)sf)->type_name; 211 PetscFunctionReturn(PETSC_SUCCESS); 212 } 213 214 /*@C 215 PetscSFDestroy - destroy a star forest 216 217 Collective 218 219 Input Parameter: 220 . sf - address of star forest 221 222 Level: intermediate 223 224 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 225 @*/ 226 PetscErrorCode PetscSFDestroy(PetscSF *sf) 227 { 228 PetscFunctionBegin; 229 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 230 PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1); 231 if (--((PetscObject)(*sf))->refct > 0) { 232 *sf = NULL; 233 PetscFunctionReturn(PETSC_SUCCESS); 234 } 235 PetscCall(PetscSFReset(*sf)); 236 PetscTryTypeMethod((*sf), Destroy); 237 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 238 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 239 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 240 if ((*sf)->use_stream_aware_mpi) { 241 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 242 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 243 } 244 #endif 245 PetscCall(PetscHeaderDestroy(sf)); 246 PetscFunctionReturn(PETSC_SUCCESS); 247 } 248 249 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 250 { 251 PetscInt i, nleaves; 252 PetscMPIInt size; 253 const PetscInt *ilocal; 254 const PetscSFNode *iremote; 255 256 PetscFunctionBegin; 257 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 258 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 259 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 260 for (i = 0; i < nleaves; i++) { 261 const PetscInt rank = iremote[i].rank; 262 const PetscInt remote = iremote[i].index; 263 const PetscInt leaf = ilocal ? ilocal[i] : i; 264 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 265 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 266 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 267 } 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 /*@ 272 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 273 274 Collective 275 276 Input Parameter: 277 . sf - star forest communication object 278 279 Level: beginner 280 281 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 282 @*/ 283 PetscErrorCode PetscSFSetUp(PetscSF sf) 284 { 285 PetscFunctionBegin; 286 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 287 PetscSFCheckGraphSet(sf, 1); 288 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 289 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 290 PetscCall(PetscSFCheckGraphValid_Private(sf)); 291 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 292 PetscTryTypeMethod(sf, SetUp); 293 #if defined(PETSC_HAVE_CUDA) 294 if (sf->backend == PETSCSF_BACKEND_CUDA) { 295 sf->ops->Malloc = PetscSFMalloc_CUDA; 296 sf->ops->Free = PetscSFFree_CUDA; 297 } 298 #endif 299 #if defined(PETSC_HAVE_HIP) 300 if (sf->backend == PETSCSF_BACKEND_HIP) { 301 sf->ops->Malloc = PetscSFMalloc_HIP; 302 sf->ops->Free = PetscSFFree_HIP; 303 } 304 #endif 305 306 #if defined(PETSC_HAVE_KOKKOS) 307 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 308 sf->ops->Malloc = PetscSFMalloc_Kokkos; 309 sf->ops->Free = PetscSFFree_Kokkos; 310 } 311 #endif 312 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 313 sf->setupcalled = PETSC_TRUE; 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 /*@ 318 PetscSFSetFromOptions - set `PetscSF` options using the options database 319 320 Logically Collective 321 322 Input Parameter: 323 . sf - star forest 324 325 Options Database Keys: 326 + -sf_type - implementation type, see `PetscSFSetType()` 327 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 328 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 329 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 330 If true, this option only works with `-use_gpu_aware_mpi 1`. 331 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 332 If true, this option only works with `-use_gpu_aware_mpi 1`. 333 334 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 335 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 336 the only available is kokkos. 337 338 Level: intermediate 339 340 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 341 @*/ 342 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 343 { 344 PetscSFType deft; 345 char type[256]; 346 PetscBool flg; 347 348 PetscFunctionBegin; 349 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 350 PetscObjectOptionsBegin((PetscObject)sf); 351 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 352 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 353 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 354 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 355 #if defined(PETSC_HAVE_DEVICE) 356 { 357 char backendstr[32] = {0}; 358 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 359 /* Change the defaults set in PetscSFCreate() with command line options */ 360 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 361 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 362 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 363 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 364 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 365 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 366 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 367 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 368 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 369 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 370 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 371 #elif defined(PETSC_HAVE_KOKKOS) 372 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 373 #endif 374 375 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 376 if (sf->use_stream_aware_mpi) { 377 MPI_Info info; 378 379 PetscCallMPI(MPI_Info_create(&info)); 380 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 381 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 382 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 383 PetscCallMPI(MPI_Info_free(&info)); 384 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 385 } 386 #endif 387 } 388 #endif 389 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 390 PetscOptionsEnd(); 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 /*@ 395 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 396 397 Logically Collective 398 399 Input Parameters: 400 + sf - star forest 401 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 402 403 Level: advanced 404 405 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 406 @*/ 407 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 408 { 409 PetscFunctionBegin; 410 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 411 PetscValidLogicalCollectiveBool(sf, flg, 2); 412 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 413 sf->rankorder = flg; 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 /*@C 418 PetscSFSetGraph - Set a parallel star forest 419 420 Collective 421 422 Input Parameters: 423 + sf - star forest 424 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 425 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 426 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 427 during setup in debug mode) 428 . localmode - copy mode for `ilocal` 429 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 430 during setup in debug mode) 431 - remotemode - copy mode for `iremote` 432 433 Level: intermediate 434 435 Notes: 436 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 437 438 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 439 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 440 PETSc might modify the respective array; 441 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 442 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 443 444 Fortran Notes: 445 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 446 447 Developer Notes: 448 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 449 This also allows to compare leaf sets of two `PetscSF`s easily. 450 451 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 452 @*/ 453 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 454 { 455 PetscBool unique, contiguous; 456 457 PetscFunctionBegin; 458 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 459 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 460 if (nleaves > 0) PetscAssertPointer(iremote, 6); 461 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 462 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 463 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 464 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 465 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 466 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 467 468 if (sf->nroots >= 0) { /* Reset only if graph already set */ 469 PetscCall(PetscSFReset(sf)); 470 } 471 472 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 473 474 sf->nroots = nroots; 475 sf->nleaves = nleaves; 476 477 if (localmode == PETSC_COPY_VALUES && ilocal) { 478 PetscInt *tlocal = NULL; 479 480 PetscCall(PetscMalloc1(nleaves, &tlocal)); 481 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 482 ilocal = tlocal; 483 } 484 if (remotemode == PETSC_COPY_VALUES) { 485 PetscSFNode *tremote = NULL; 486 487 PetscCall(PetscMalloc1(nleaves, &tremote)); 488 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 489 iremote = tremote; 490 } 491 492 if (nleaves && ilocal) { 493 PetscSFNode work; 494 495 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 496 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 497 unique = PetscNot(unique); 498 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 499 sf->minleaf = ilocal[0]; 500 sf->maxleaf = ilocal[nleaves - 1]; 501 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 502 } else { 503 sf->minleaf = 0; 504 sf->maxleaf = nleaves - 1; 505 unique = PETSC_TRUE; 506 contiguous = PETSC_TRUE; 507 } 508 509 if (contiguous) { 510 if (localmode == PETSC_USE_POINTER) { 511 ilocal = NULL; 512 } else { 513 PetscCall(PetscFree(ilocal)); 514 } 515 } 516 sf->mine = ilocal; 517 if (localmode == PETSC_USE_POINTER) { 518 sf->mine_alloc = NULL; 519 } else { 520 sf->mine_alloc = ilocal; 521 } 522 sf->remote = iremote; 523 if (remotemode == PETSC_USE_POINTER) { 524 sf->remote_alloc = NULL; 525 } else { 526 sf->remote_alloc = iremote; 527 } 528 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 529 sf->graphset = PETSC_TRUE; 530 PetscFunctionReturn(PETSC_SUCCESS); 531 } 532 533 /*@ 534 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 535 536 Collective 537 538 Input Parameters: 539 + sf - The `PetscSF` 540 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 541 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 542 543 Level: intermediate 544 545 Notes: 546 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 547 `n` and `N` are the local and global sizes of `x` respectively. 548 549 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 550 sequential vectors `y` on all MPI processes. 551 552 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 553 sequential vector `y` on rank 0. 554 555 In above cases, entries of `x` are roots and entries of `y` are leaves. 556 557 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 558 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 559 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 560 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 561 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 562 563 In this case, roots and leaves are symmetric. 564 565 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 566 @*/ 567 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 568 { 569 MPI_Comm comm; 570 PetscInt n, N, res[2]; 571 PetscMPIInt rank, size; 572 PetscSFType type; 573 574 PetscFunctionBegin; 575 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 576 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 577 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 578 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 579 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 580 PetscCallMPI(MPI_Comm_size(comm, &size)); 581 582 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 583 type = PETSCSFALLTOALL; 584 PetscCall(PetscLayoutCreate(comm, &sf->map)); 585 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 586 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 587 PetscCall(PetscLayoutSetUp(sf->map)); 588 } else { 589 PetscCall(PetscLayoutGetLocalSize(map, &n)); 590 PetscCall(PetscLayoutGetSize(map, &N)); 591 res[0] = n; 592 res[1] = -n; 593 /* Check if n are same over all ranks so that we can optimize it */ 594 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 595 if (res[0] == -res[1]) { /* same n */ 596 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 597 } else { 598 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 599 } 600 PetscCall(PetscLayoutReference(map, &sf->map)); 601 } 602 PetscCall(PetscSFSetType(sf, type)); 603 604 sf->pattern = pattern; 605 sf->mine = NULL; /* Contiguous */ 606 607 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 608 Also set other easy stuff. 609 */ 610 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 611 sf->nleaves = N; 612 sf->nroots = n; 613 sf->nranks = size; 614 sf->minleaf = 0; 615 sf->maxleaf = N - 1; 616 } else if (pattern == PETSCSF_PATTERN_GATHER) { 617 sf->nleaves = rank ? 0 : N; 618 sf->nroots = n; 619 sf->nranks = rank ? 0 : size; 620 sf->minleaf = 0; 621 sf->maxleaf = rank ? -1 : N - 1; 622 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 623 sf->nleaves = size; 624 sf->nroots = size; 625 sf->nranks = size; 626 sf->minleaf = 0; 627 sf->maxleaf = size - 1; 628 } 629 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 630 sf->graphset = PETSC_TRUE; 631 PetscFunctionReturn(PETSC_SUCCESS); 632 } 633 634 /*@ 635 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 636 637 Collective 638 639 Input Parameter: 640 . sf - star forest to invert 641 642 Output Parameter: 643 . isf - inverse of `sf` 644 645 Level: advanced 646 647 Notes: 648 All roots must have degree 1. 649 650 The local space may be a permutation, but cannot be sparse. 651 652 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 653 @*/ 654 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 655 { 656 PetscMPIInt rank; 657 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 658 const PetscInt *ilocal; 659 PetscSFNode *roots, *leaves; 660 661 PetscFunctionBegin; 662 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 663 PetscSFCheckGraphSet(sf, 1); 664 PetscAssertPointer(isf, 2); 665 666 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 667 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 668 669 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 670 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 671 for (i = 0; i < maxlocal; i++) { 672 leaves[i].rank = rank; 673 leaves[i].index = i; 674 } 675 for (i = 0; i < nroots; i++) { 676 roots[i].rank = -1; 677 roots[i].index = -1; 678 } 679 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 680 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 681 682 /* Check whether our leaves are sparse */ 683 for (i = 0, count = 0; i < nroots; i++) 684 if (roots[i].rank >= 0) count++; 685 if (count == nroots) newilocal = NULL; 686 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 687 for (i = 0, count = 0; i < nroots; i++) { 688 if (roots[i].rank >= 0) { 689 newilocal[count] = i; 690 roots[count].rank = roots[i].rank; 691 roots[count].index = roots[i].index; 692 count++; 693 } 694 } 695 } 696 697 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 698 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 699 PetscCall(PetscFree2(roots, leaves)); 700 PetscFunctionReturn(PETSC_SUCCESS); 701 } 702 703 /*@ 704 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 705 706 Collective 707 708 Input Parameters: 709 + sf - communication object to duplicate 710 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 711 712 Output Parameter: 713 . newsf - new communication object 714 715 Level: beginner 716 717 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 718 @*/ 719 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 720 { 721 PetscSFType type; 722 MPI_Datatype dtype = MPIU_SCALAR; 723 724 PetscFunctionBegin; 725 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 726 PetscValidLogicalCollectiveEnum(sf, opt, 2); 727 PetscAssertPointer(newsf, 3); 728 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 729 PetscCall(PetscSFGetType(sf, &type)); 730 if (type) PetscCall(PetscSFSetType(*newsf, type)); 731 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 732 if (opt == PETSCSF_DUPLICATE_GRAPH) { 733 PetscSFCheckGraphSet(sf, 1); 734 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 735 PetscInt nroots, nleaves; 736 const PetscInt *ilocal; 737 const PetscSFNode *iremote; 738 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 739 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 740 } else { 741 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 742 } 743 } 744 /* Since oldtype is committed, so is newtype, according to MPI */ 745 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 746 (*newsf)->vscat.bs = sf->vscat.bs; 747 (*newsf)->vscat.unit = dtype; 748 (*newsf)->vscat.to_n = sf->vscat.to_n; 749 (*newsf)->vscat.from_n = sf->vscat.from_n; 750 /* Do not copy lsf. Build it on demand since it is rarely used */ 751 752 #if defined(PETSC_HAVE_DEVICE) 753 (*newsf)->backend = sf->backend; 754 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 755 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 756 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 757 #endif 758 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 759 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 760 PetscFunctionReturn(PETSC_SUCCESS); 761 } 762 763 /*@C 764 PetscSFGetGraph - Get the graph specifying a parallel star forest 765 766 Not Collective 767 768 Input Parameter: 769 . sf - star forest 770 771 Output Parameters: 772 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 773 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 774 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 775 - iremote - remote locations of root vertices for each leaf on the current process 776 777 Level: intermediate 778 779 Notes: 780 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 781 782 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 783 784 Fortran Notes: 785 The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you 786 want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array. 787 788 To check for a `NULL` `ilocal` use 789 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 790 791 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 792 @*/ 793 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 794 { 795 PetscFunctionBegin; 796 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 797 if (sf->ops->GetGraph) { 798 PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote)); 799 } else { 800 if (nroots) *nroots = sf->nroots; 801 if (nleaves) *nleaves = sf->nleaves; 802 if (ilocal) *ilocal = sf->mine; 803 if (iremote) *iremote = sf->remote; 804 } 805 PetscFunctionReturn(PETSC_SUCCESS); 806 } 807 808 /*@ 809 PetscSFGetLeafRange - Get the active leaf ranges 810 811 Not Collective 812 813 Input Parameter: 814 . sf - star forest 815 816 Output Parameters: 817 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 818 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 819 820 Level: developer 821 822 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 823 @*/ 824 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 825 { 826 PetscFunctionBegin; 827 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 828 PetscSFCheckGraphSet(sf, 1); 829 if (minleaf) *minleaf = sf->minleaf; 830 if (maxleaf) *maxleaf = sf->maxleaf; 831 PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 /*@C 835 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 836 837 Collective 838 839 Input Parameters: 840 + A - the star forest 841 . obj - Optional object that provides the prefix for the option names 842 - name - command line option 843 844 Level: intermediate 845 846 Note: 847 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 848 849 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 850 @*/ 851 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 852 { 853 PetscFunctionBegin; 854 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 855 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 856 PetscFunctionReturn(PETSC_SUCCESS); 857 } 858 859 /*@C 860 PetscSFView - view a star forest 861 862 Collective 863 864 Input Parameters: 865 + sf - star forest 866 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 867 868 Level: beginner 869 870 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 871 @*/ 872 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 873 { 874 PetscBool iascii; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 879 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 880 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 881 PetscCheckSameComm(sf, 1, viewer, 2); 882 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 883 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 884 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 885 PetscMPIInt rank; 886 PetscInt ii, i, j; 887 888 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 889 PetscCall(PetscViewerASCIIPushTab(viewer)); 890 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 891 if (!sf->graphset) { 892 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 893 PetscCall(PetscViewerASCIIPopTab(viewer)); 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 897 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 898 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 899 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 900 PetscCall(PetscViewerFlush(viewer)); 901 PetscCall(PetscViewerGetFormat(viewer, &format)); 902 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 903 PetscMPIInt *tmpranks, *perm; 904 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 905 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 906 for (i = 0; i < sf->nranks; i++) perm[i] = i; 907 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 908 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 909 for (ii = 0; ii < sf->nranks; ii++) { 910 i = perm[ii]; 911 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 912 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 913 } 914 PetscCall(PetscFree2(tmpranks, perm)); 915 } 916 PetscCall(PetscViewerFlush(viewer)); 917 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 918 } 919 PetscCall(PetscViewerASCIIPopTab(viewer)); 920 } 921 PetscTryTypeMethod(sf, View, viewer); 922 PetscFunctionReturn(PETSC_SUCCESS); 923 } 924 925 /*@C 926 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 927 928 Not Collective 929 930 Input Parameter: 931 . sf - star forest 932 933 Output Parameters: 934 + nranks - number of ranks referenced by local part 935 . ranks - [`nranks`] array of ranks 936 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 937 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank 938 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank 939 940 Level: developer 941 942 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 943 @*/ 944 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 945 { 946 PetscFunctionBegin; 947 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 948 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 949 if (sf->ops->GetRootRanks) { 950 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 951 } else { 952 /* The generic implementation */ 953 if (nranks) *nranks = sf->nranks; 954 if (ranks) *ranks = sf->ranks; 955 if (roffset) *roffset = sf->roffset; 956 if (rmine) *rmine = sf->rmine; 957 if (rremote) *rremote = sf->rremote; 958 } 959 PetscFunctionReturn(PETSC_SUCCESS); 960 } 961 962 /*@C 963 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 964 965 Not Collective 966 967 Input Parameter: 968 . sf - star forest 969 970 Output Parameters: 971 + niranks - number of leaf ranks referencing roots on this process 972 . iranks - [`niranks`] array of ranks 973 . ioffset - [`niranks`+1] offset in `irootloc` for each rank 974 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 975 976 Level: developer 977 978 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 979 @*/ 980 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 981 { 982 PetscFunctionBegin; 983 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 984 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 985 if (sf->ops->GetLeafRanks) { 986 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 987 } else { 988 PetscSFType type; 989 PetscCall(PetscSFGetType(sf, &type)); 990 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 991 } 992 PetscFunctionReturn(PETSC_SUCCESS); 993 } 994 995 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 996 { 997 PetscInt i; 998 for (i = 0; i < n; i++) { 999 if (needle == list[i]) return PETSC_TRUE; 1000 } 1001 return PETSC_FALSE; 1002 } 1003 1004 /*@C 1005 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 1006 1007 Collective 1008 1009 Input Parameters: 1010 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1011 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1012 1013 Level: developer 1014 1015 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 1016 @*/ 1017 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1018 { 1019 PetscHMapI table; 1020 PetscHashIter pos; 1021 PetscMPIInt size, groupsize, *groupranks; 1022 PetscInt *rcount, *ranks; 1023 PetscInt i, irank = -1, orank = -1; 1024 1025 PetscFunctionBegin; 1026 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1027 PetscSFCheckGraphSet(sf, 1); 1028 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1029 PetscCall(PetscHMapICreateWithSize(10, &table)); 1030 for (i = 0; i < sf->nleaves; i++) { 1031 /* Log 1-based rank */ 1032 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1033 } 1034 PetscCall(PetscHMapIGetSize(table, &sf->nranks)); 1035 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1036 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1037 PetscHashIterBegin(table, pos); 1038 for (i = 0; i < sf->nranks; i++) { 1039 PetscHashIterGetKey(table, pos, ranks[i]); 1040 PetscHashIterGetVal(table, pos, rcount[i]); 1041 PetscHashIterNext(table, pos); 1042 ranks[i]--; /* Convert back to 0-based */ 1043 } 1044 PetscCall(PetscHMapIDestroy(&table)); 1045 1046 /* We expect that dgroup is reliably "small" while nranks could be large */ 1047 { 1048 MPI_Group group = MPI_GROUP_NULL; 1049 PetscMPIInt *dgroupranks; 1050 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1051 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1052 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1053 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1054 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1055 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1056 PetscCallMPI(MPI_Group_free(&group)); 1057 PetscCall(PetscFree(dgroupranks)); 1058 } 1059 1060 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1061 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1062 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1063 if (InList(ranks[i], groupsize, groupranks)) break; 1064 } 1065 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1066 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1067 } 1068 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1069 PetscInt tmprank, tmpcount; 1070 1071 tmprank = ranks[i]; 1072 tmpcount = rcount[i]; 1073 ranks[i] = ranks[sf->ndranks]; 1074 rcount[i] = rcount[sf->ndranks]; 1075 ranks[sf->ndranks] = tmprank; 1076 rcount[sf->ndranks] = tmpcount; 1077 sf->ndranks++; 1078 } 1079 } 1080 PetscCall(PetscFree(groupranks)); 1081 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1082 if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1083 sf->roffset[0] = 0; 1084 for (i = 0; i < sf->nranks; i++) { 1085 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1086 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1087 rcount[i] = 0; 1088 } 1089 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1090 /* short circuit */ 1091 if (orank != sf->remote[i].rank) { 1092 /* Search for index of iremote[i].rank in sf->ranks */ 1093 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1094 if (irank < 0) { 1095 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1096 if (irank >= 0) irank += sf->ndranks; 1097 } 1098 orank = sf->remote[i].rank; 1099 } 1100 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1101 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1102 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1103 rcount[irank]++; 1104 } 1105 PetscCall(PetscFree2(rcount, ranks)); 1106 PetscFunctionReturn(PETSC_SUCCESS); 1107 } 1108 1109 /*@C 1110 PetscSFGetGroups - gets incoming and outgoing process groups 1111 1112 Collective 1113 1114 Input Parameter: 1115 . sf - star forest 1116 1117 Output Parameters: 1118 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1119 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1120 1121 Level: developer 1122 1123 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1124 @*/ 1125 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1126 { 1127 MPI_Group group = MPI_GROUP_NULL; 1128 1129 PetscFunctionBegin; 1130 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1131 if (sf->ingroup == MPI_GROUP_NULL) { 1132 PetscInt i; 1133 const PetscInt *indegree; 1134 PetscMPIInt rank, *outranks, *inranks; 1135 PetscSFNode *remote; 1136 PetscSF bgcount; 1137 1138 /* Compute the number of incoming ranks */ 1139 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1140 for (i = 0; i < sf->nranks; i++) { 1141 remote[i].rank = sf->ranks[i]; 1142 remote[i].index = 0; 1143 } 1144 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1145 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1146 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1147 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1148 /* Enumerate the incoming ranks */ 1149 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1150 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1151 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1152 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1153 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1154 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1155 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1156 PetscCallMPI(MPI_Group_free(&group)); 1157 PetscCall(PetscFree2(inranks, outranks)); 1158 PetscCall(PetscSFDestroy(&bgcount)); 1159 } 1160 *incoming = sf->ingroup; 1161 1162 if (sf->outgroup == MPI_GROUP_NULL) { 1163 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1164 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1165 PetscCallMPI(MPI_Group_free(&group)); 1166 } 1167 *outgoing = sf->outgroup; 1168 PetscFunctionReturn(PETSC_SUCCESS); 1169 } 1170 1171 /*@ 1172 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1173 1174 Collective 1175 1176 Input Parameter: 1177 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1178 1179 Output Parameter: 1180 . multi - star forest with split roots, such that each root has degree exactly 1 1181 1182 Level: developer 1183 1184 Note: 1185 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1186 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1187 edge, it is a candidate for future optimization that might involve its removal. 1188 1189 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1190 @*/ 1191 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1192 { 1193 PetscFunctionBegin; 1194 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1195 PetscAssertPointer(multi, 2); 1196 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1197 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1198 *multi = sf->multi; 1199 sf->multi->multi = sf->multi; 1200 PetscFunctionReturn(PETSC_SUCCESS); 1201 } 1202 if (!sf->multi) { 1203 const PetscInt *indegree; 1204 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1205 PetscSFNode *remote; 1206 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1207 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1208 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1209 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1210 inoffset[0] = 0; 1211 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1212 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1213 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1214 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1215 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1216 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1217 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1218 } 1219 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1220 for (i = 0; i < sf->nleaves; i++) { 1221 remote[i].rank = sf->remote[i].rank; 1222 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1223 } 1224 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1225 sf->multi->multi = sf->multi; 1226 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1227 if (sf->rankorder) { /* Sort the ranks */ 1228 PetscMPIInt rank; 1229 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1230 PetscSFNode *newremote; 1231 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1232 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1233 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1234 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1235 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1236 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1237 /* Sort the incoming ranks at each vertex, build the inverse map */ 1238 for (i = 0; i < sf->nroots; i++) { 1239 PetscInt j; 1240 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1241 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1242 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1243 } 1244 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1245 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1246 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1247 for (i = 0; i < sf->nleaves; i++) { 1248 newremote[i].rank = sf->remote[i].rank; 1249 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1250 } 1251 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1252 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1253 } 1254 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1255 } 1256 *multi = sf->multi; 1257 PetscFunctionReturn(PETSC_SUCCESS); 1258 } 1259 1260 /*@C 1261 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1262 1263 Collective 1264 1265 Input Parameters: 1266 + sf - original star forest 1267 . nselected - number of selected roots on this process 1268 - selected - indices of the selected roots on this process 1269 1270 Output Parameter: 1271 . esf - new star forest 1272 1273 Level: advanced 1274 1275 Note: 1276 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1277 be done by calling PetscSFGetGraph(). 1278 1279 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1280 @*/ 1281 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1282 { 1283 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1284 const PetscInt *ilocal; 1285 signed char *rootdata, *leafdata, *leafmem; 1286 const PetscSFNode *iremote; 1287 PetscSFNode *new_iremote; 1288 MPI_Comm comm; 1289 1290 PetscFunctionBegin; 1291 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1292 PetscSFCheckGraphSet(sf, 1); 1293 if (nselected) PetscAssertPointer(selected, 3); 1294 PetscAssertPointer(esf, 4); 1295 1296 PetscCall(PetscSFSetUp(sf)); 1297 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1298 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1299 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1300 1301 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1302 PetscBool dups; 1303 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1304 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1305 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1306 } 1307 1308 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1309 else { 1310 /* A generic version of creating embedded sf */ 1311 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1312 maxlocal = maxleaf - minleaf + 1; 1313 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1314 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1315 /* Tag selected roots and bcast to leaves */ 1316 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1317 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1318 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1319 1320 /* Build esf with leaves that are still connected */ 1321 esf_nleaves = 0; 1322 for (i = 0; i < nleaves; i++) { 1323 j = ilocal ? ilocal[i] : i; 1324 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1325 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1326 */ 1327 esf_nleaves += (leafdata[j] ? 1 : 0); 1328 } 1329 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1330 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1331 for (i = n = 0; i < nleaves; i++) { 1332 j = ilocal ? ilocal[i] : i; 1333 if (leafdata[j]) { 1334 new_ilocal[n] = j; 1335 new_iremote[n].rank = iremote[i].rank; 1336 new_iremote[n].index = iremote[i].index; 1337 ++n; 1338 } 1339 } 1340 PetscCall(PetscSFCreate(comm, esf)); 1341 PetscCall(PetscSFSetFromOptions(*esf)); 1342 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1343 PetscCall(PetscFree2(rootdata, leafmem)); 1344 } 1345 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1346 PetscFunctionReturn(PETSC_SUCCESS); 1347 } 1348 1349 /*@C 1350 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1351 1352 Collective 1353 1354 Input Parameters: 1355 + sf - original star forest 1356 . nselected - number of selected leaves on this process 1357 - selected - indices of the selected leaves on this process 1358 1359 Output Parameter: 1360 . newsf - new star forest 1361 1362 Level: advanced 1363 1364 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1365 @*/ 1366 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1367 { 1368 const PetscSFNode *iremote; 1369 PetscSFNode *new_iremote; 1370 const PetscInt *ilocal; 1371 PetscInt i, nroots, *leaves, *new_ilocal; 1372 MPI_Comm comm; 1373 1374 PetscFunctionBegin; 1375 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1376 PetscSFCheckGraphSet(sf, 1); 1377 if (nselected) PetscAssertPointer(selected, 3); 1378 PetscAssertPointer(newsf, 4); 1379 1380 /* Uniq selected[] and put results in leaves[] */ 1381 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1382 PetscCall(PetscMalloc1(nselected, &leaves)); 1383 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1384 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1385 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1386 1387 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1388 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1389 else { 1390 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1391 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1392 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1393 for (i = 0; i < nselected; ++i) { 1394 const PetscInt l = leaves[i]; 1395 new_ilocal[i] = ilocal ? ilocal[l] : l; 1396 new_iremote[i].rank = iremote[l].rank; 1397 new_iremote[i].index = iremote[l].index; 1398 } 1399 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1400 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1401 } 1402 PetscCall(PetscFree(leaves)); 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 /*@C 1407 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1408 1409 Collective 1410 1411 Input Parameters: 1412 + sf - star forest on which to communicate 1413 . unit - data type associated with each node 1414 . rootdata - buffer to broadcast 1415 - op - operation to use for reduction 1416 1417 Output Parameter: 1418 . leafdata - buffer to be reduced with values from each leaf's respective root 1419 1420 Level: intermediate 1421 1422 Note: 1423 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1424 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1425 use `PetscSFBcastWithMemTypeBegin()` instead. 1426 1427 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1428 @*/ 1429 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1430 { 1431 PetscMemType rootmtype, leafmtype; 1432 1433 PetscFunctionBegin; 1434 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1435 PetscCall(PetscSFSetUp(sf)); 1436 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1437 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1438 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1439 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1440 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1441 PetscFunctionReturn(PETSC_SUCCESS); 1442 } 1443 1444 /*@C 1445 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1446 to `PetscSFBcastEnd()` 1447 1448 Collective 1449 1450 Input Parameters: 1451 + sf - star forest on which to communicate 1452 . unit - data type associated with each node 1453 . rootmtype - memory type of rootdata 1454 . rootdata - buffer to broadcast 1455 . leafmtype - memory type of leafdata 1456 - op - operation to use for reduction 1457 1458 Output Parameter: 1459 . leafdata - buffer to be reduced with values from each leaf's respective root 1460 1461 Level: intermediate 1462 1463 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1464 @*/ 1465 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1466 { 1467 PetscFunctionBegin; 1468 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1469 PetscCall(PetscSFSetUp(sf)); 1470 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1471 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1472 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1473 PetscFunctionReturn(PETSC_SUCCESS); 1474 } 1475 1476 /*@C 1477 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1478 1479 Collective 1480 1481 Input Parameters: 1482 + sf - star forest 1483 . unit - data type 1484 . rootdata - buffer to broadcast 1485 - op - operation to use for reduction 1486 1487 Output Parameter: 1488 . leafdata - buffer to be reduced with values from each leaf's respective root 1489 1490 Level: intermediate 1491 1492 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1493 @*/ 1494 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1495 { 1496 PetscFunctionBegin; 1497 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1498 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1499 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1500 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1501 PetscFunctionReturn(PETSC_SUCCESS); 1502 } 1503 1504 /*@C 1505 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1506 1507 Collective 1508 1509 Input Parameters: 1510 + sf - star forest 1511 . unit - data type 1512 . leafdata - values to reduce 1513 - op - reduction operation 1514 1515 Output Parameter: 1516 . rootdata - result of reduction of values from all leaves of each root 1517 1518 Level: intermediate 1519 1520 Note: 1521 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1522 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1523 use `PetscSFReduceWithMemTypeBegin()` instead. 1524 1525 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 1526 @*/ 1527 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1528 { 1529 PetscMemType rootmtype, leafmtype; 1530 1531 PetscFunctionBegin; 1532 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1533 PetscCall(PetscSFSetUp(sf)); 1534 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1535 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1536 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1537 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1538 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1539 PetscFunctionReturn(PETSC_SUCCESS); 1540 } 1541 1542 /*@C 1543 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1544 1545 Collective 1546 1547 Input Parameters: 1548 + sf - star forest 1549 . unit - data type 1550 . leafmtype - memory type of leafdata 1551 . leafdata - values to reduce 1552 . rootmtype - memory type of rootdata 1553 - op - reduction operation 1554 1555 Output Parameter: 1556 . rootdata - result of reduction of values from all leaves of each root 1557 1558 Level: intermediate 1559 1560 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1561 @*/ 1562 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1563 { 1564 PetscFunctionBegin; 1565 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1566 PetscCall(PetscSFSetUp(sf)); 1567 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1568 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1569 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1570 PetscFunctionReturn(PETSC_SUCCESS); 1571 } 1572 1573 /*@C 1574 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1575 1576 Collective 1577 1578 Input Parameters: 1579 + sf - star forest 1580 . unit - data type 1581 . leafdata - values to reduce 1582 - op - reduction operation 1583 1584 Output Parameter: 1585 . rootdata - result of reduction of values from all leaves of each root 1586 1587 Level: intermediate 1588 1589 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1590 @*/ 1591 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1592 { 1593 PetscFunctionBegin; 1594 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1595 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1596 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1597 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1598 PetscFunctionReturn(PETSC_SUCCESS); 1599 } 1600 1601 /*@C 1602 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1603 to be completed with `PetscSFFetchAndOpEnd()` 1604 1605 Collective 1606 1607 Input Parameters: 1608 + sf - star forest 1609 . unit - data type 1610 . leafdata - leaf values to use in reduction 1611 - op - operation to use for reduction 1612 1613 Output Parameters: 1614 + rootdata - root values to be updated, input state is seen by first process to perform an update 1615 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1616 1617 Level: advanced 1618 1619 Note: 1620 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1621 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1622 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1623 integers. 1624 1625 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1626 @*/ 1627 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1628 { 1629 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1630 1631 PetscFunctionBegin; 1632 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1633 PetscCall(PetscSFSetUp(sf)); 1634 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1635 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1636 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1637 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1638 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1639 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1640 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1641 PetscFunctionReturn(PETSC_SUCCESS); 1642 } 1643 1644 /*@C 1645 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1646 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1647 1648 Collective 1649 1650 Input Parameters: 1651 + sf - star forest 1652 . unit - data type 1653 . rootmtype - memory type of rootdata 1654 . leafmtype - memory type of leafdata 1655 . leafdata - leaf values to use in reduction 1656 . leafupdatemtype - memory type of leafupdate 1657 - op - operation to use for reduction 1658 1659 Output Parameters: 1660 + rootdata - root values to be updated, input state is seen by first process to perform an update 1661 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1662 1663 Level: advanced 1664 1665 Note: 1666 See `PetscSFFetchAndOpBegin()` for more details. 1667 1668 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1669 @*/ 1670 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1671 { 1672 PetscFunctionBegin; 1673 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1674 PetscCall(PetscSFSetUp(sf)); 1675 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1676 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1677 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1678 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1679 PetscFunctionReturn(PETSC_SUCCESS); 1680 } 1681 1682 /*@C 1683 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1684 to fetch values from roots and update atomically by applying operation using my leaf value 1685 1686 Collective 1687 1688 Input Parameters: 1689 + sf - star forest 1690 . unit - data type 1691 . leafdata - leaf values to use in reduction 1692 - op - operation to use for reduction 1693 1694 Output Parameters: 1695 + rootdata - root values to be updated, input state is seen by first process to perform an update 1696 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1697 1698 Level: advanced 1699 1700 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1701 @*/ 1702 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1703 { 1704 PetscFunctionBegin; 1705 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1706 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1707 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1708 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1709 PetscFunctionReturn(PETSC_SUCCESS); 1710 } 1711 1712 /*@C 1713 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1714 1715 Collective 1716 1717 Input Parameter: 1718 . sf - star forest 1719 1720 Output Parameter: 1721 . degree - degree of each root vertex 1722 1723 Level: advanced 1724 1725 Note: 1726 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1727 1728 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1729 @*/ 1730 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) 1731 { 1732 PetscFunctionBegin; 1733 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1734 PetscSFCheckGraphSet(sf, 1); 1735 PetscAssertPointer(degree, 2); 1736 if (!sf->degreeknown) { 1737 PetscInt i, nroots = sf->nroots, maxlocal; 1738 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1739 maxlocal = sf->maxleaf - sf->minleaf + 1; 1740 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1741 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1742 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1743 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1744 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1745 } 1746 *degree = NULL; 1747 PetscFunctionReturn(PETSC_SUCCESS); 1748 } 1749 1750 /*@C 1751 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1752 1753 Collective 1754 1755 Input Parameter: 1756 . sf - star forest 1757 1758 Output Parameter: 1759 . degree - degree of each root vertex 1760 1761 Level: developer 1762 1763 Note: 1764 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1765 1766 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1767 @*/ 1768 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1769 { 1770 PetscFunctionBegin; 1771 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1772 PetscSFCheckGraphSet(sf, 1); 1773 PetscAssertPointer(degree, 2); 1774 if (!sf->degreeknown) { 1775 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1776 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1777 PetscCall(PetscFree(sf->degreetmp)); 1778 sf->degreeknown = PETSC_TRUE; 1779 } 1780 *degree = sf->degree; 1781 PetscFunctionReturn(PETSC_SUCCESS); 1782 } 1783 1784 /*@C 1785 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1786 Each multi-root is assigned index of the corresponding original root. 1787 1788 Collective 1789 1790 Input Parameters: 1791 + sf - star forest 1792 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1793 1794 Output Parameters: 1795 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1796 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1797 1798 Level: developer 1799 1800 Note: 1801 The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1802 1803 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1804 @*/ 1805 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1806 { 1807 PetscSF msf; 1808 PetscInt i, j, k, nroots, nmroots; 1809 1810 PetscFunctionBegin; 1811 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1812 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1813 if (nroots) PetscAssertPointer(degree, 2); 1814 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1815 PetscAssertPointer(multiRootsOrigNumbering, 4); 1816 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1817 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1818 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1819 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1820 if (!degree[i]) continue; 1821 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1822 } 1823 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1824 if (nMultiRoots) *nMultiRoots = nmroots; 1825 PetscFunctionReturn(PETSC_SUCCESS); 1826 } 1827 1828 /*@C 1829 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1830 1831 Collective 1832 1833 Input Parameters: 1834 + sf - star forest 1835 . unit - data type 1836 - leafdata - leaf data to gather to roots 1837 1838 Output Parameter: 1839 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1840 1841 Level: intermediate 1842 1843 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1844 @*/ 1845 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1846 { 1847 PetscSF multi = NULL; 1848 1849 PetscFunctionBegin; 1850 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1851 PetscCall(PetscSFSetUp(sf)); 1852 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1853 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1854 PetscFunctionReturn(PETSC_SUCCESS); 1855 } 1856 1857 /*@C 1858 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1859 1860 Collective 1861 1862 Input Parameters: 1863 + sf - star forest 1864 . unit - data type 1865 - leafdata - leaf data to gather to roots 1866 1867 Output Parameter: 1868 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1869 1870 Level: intermediate 1871 1872 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1873 @*/ 1874 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1875 { 1876 PetscSF multi = NULL; 1877 1878 PetscFunctionBegin; 1879 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1880 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1881 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 /*@C 1886 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1887 1888 Collective 1889 1890 Input Parameters: 1891 + sf - star forest 1892 . unit - data type 1893 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1894 1895 Output Parameter: 1896 . leafdata - leaf data to be update with personal data from each respective root 1897 1898 Level: intermediate 1899 1900 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 1901 @*/ 1902 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1903 { 1904 PetscSF multi = NULL; 1905 1906 PetscFunctionBegin; 1907 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1908 PetscCall(PetscSFSetUp(sf)); 1909 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1910 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1911 PetscFunctionReturn(PETSC_SUCCESS); 1912 } 1913 1914 /*@C 1915 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1916 1917 Collective 1918 1919 Input Parameters: 1920 + sf - star forest 1921 . unit - data type 1922 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1923 1924 Output Parameter: 1925 . leafdata - leaf data to be update with personal data from each respective root 1926 1927 Level: intermediate 1928 1929 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1930 @*/ 1931 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1932 { 1933 PetscSF multi = NULL; 1934 1935 PetscFunctionBegin; 1936 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1937 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1938 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1939 PetscFunctionReturn(PETSC_SUCCESS); 1940 } 1941 1942 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1943 { 1944 PetscInt i, n, nleaves; 1945 const PetscInt *ilocal = NULL; 1946 PetscHSetI seen; 1947 1948 PetscFunctionBegin; 1949 if (PetscDefined(USE_DEBUG)) { 1950 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1951 PetscCall(PetscHSetICreate(&seen)); 1952 for (i = 0; i < nleaves; i++) { 1953 const PetscInt leaf = ilocal ? ilocal[i] : i; 1954 PetscCall(PetscHSetIAdd(seen, leaf)); 1955 } 1956 PetscCall(PetscHSetIGetSize(seen, &n)); 1957 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 1958 PetscCall(PetscHSetIDestroy(&seen)); 1959 } 1960 PetscFunctionReturn(PETSC_SUCCESS); 1961 } 1962 1963 /*@ 1964 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 1965 1966 Input Parameters: 1967 + sfA - The first `PetscSF` 1968 - sfB - The second `PetscSF` 1969 1970 Output Parameter: 1971 . sfBA - The composite `PetscSF` 1972 1973 Level: developer 1974 1975 Notes: 1976 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 1977 forests, i.e. the same leaf is not connected with different roots. 1978 1979 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 1980 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 1981 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 1982 `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 1983 1984 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1985 @*/ 1986 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 1987 { 1988 const PetscSFNode *remotePointsA, *remotePointsB; 1989 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 1990 const PetscInt *localPointsA, *localPointsB; 1991 PetscInt *localPointsBA; 1992 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 1993 PetscBool denseB; 1994 1995 PetscFunctionBegin; 1996 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 1997 PetscSFCheckGraphSet(sfA, 1); 1998 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 1999 PetscSFCheckGraphSet(sfB, 2); 2000 PetscCheckSameComm(sfA, 1, sfB, 2); 2001 PetscAssertPointer(sfBA, 3); 2002 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2003 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2004 2005 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2006 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2007 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2008 numRootsB; otherwise, garbage will be broadcasted. 2009 Example (comm size = 1): 2010 sfA: 0 <- (0, 0) 2011 sfB: 100 <- (0, 0) 2012 101 <- (0, 1) 2013 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2014 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2015 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2016 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2017 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2018 for (i = 0; i < numRootsB; i++) { 2019 reorderedRemotePointsA[i].rank = -1; 2020 reorderedRemotePointsA[i].index = -1; 2021 } 2022 for (i = 0; i < numLeavesA; i++) { 2023 PetscInt localp = localPointsA ? localPointsA[i] : i; 2024 2025 if (localp >= numRootsB) continue; 2026 reorderedRemotePointsA[localp] = remotePointsA[i]; 2027 } 2028 remotePointsA = reorderedRemotePointsA; 2029 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2030 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2031 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2032 leafdataB[i].rank = -1; 2033 leafdataB[i].index = -1; 2034 } 2035 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2036 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2037 PetscCall(PetscFree(reorderedRemotePointsA)); 2038 2039 denseB = (PetscBool)!localPointsB; 2040 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2041 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2042 else numLeavesBA++; 2043 } 2044 if (denseB) { 2045 localPointsBA = NULL; 2046 remotePointsBA = leafdataB; 2047 } else { 2048 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2049 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2050 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2051 const PetscInt l = localPointsB ? localPointsB[i] : i; 2052 2053 if (leafdataB[l - minleaf].rank == -1) continue; 2054 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2055 localPointsBA[numLeavesBA] = l; 2056 numLeavesBA++; 2057 } 2058 PetscCall(PetscFree(leafdataB)); 2059 } 2060 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2061 PetscCall(PetscSFSetFromOptions(*sfBA)); 2062 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2063 PetscFunctionReturn(PETSC_SUCCESS); 2064 } 2065 2066 /*@ 2067 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2068 2069 Input Parameters: 2070 + sfA - The first `PetscSF` 2071 - sfB - The second `PetscSF` 2072 2073 Output Parameter: 2074 . sfBA - The composite `PetscSF`. 2075 2076 Level: developer 2077 2078 Notes: 2079 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2080 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2081 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2082 2083 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2084 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2085 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 2086 on `sfA`, then 2087 a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 2088 2089 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2090 @*/ 2091 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2092 { 2093 const PetscSFNode *remotePointsA, *remotePointsB; 2094 PetscSFNode *remotePointsBA; 2095 const PetscInt *localPointsA, *localPointsB; 2096 PetscSFNode *reorderedRemotePointsA = NULL; 2097 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2098 MPI_Op op; 2099 #if defined(PETSC_USE_64BIT_INDICES) 2100 PetscBool iswin; 2101 #endif 2102 2103 PetscFunctionBegin; 2104 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2105 PetscSFCheckGraphSet(sfA, 1); 2106 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2107 PetscSFCheckGraphSet(sfB, 2); 2108 PetscCheckSameComm(sfA, 1, sfB, 2); 2109 PetscAssertPointer(sfBA, 3); 2110 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2111 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2112 2113 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2114 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2115 2116 /* TODO: Check roots of sfB have degree of 1 */ 2117 /* Once we implement it, we can replace the MPI_MAXLOC 2118 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2119 We use MPI_MAXLOC only to have a deterministic output from this routine if 2120 the root condition is not meet. 2121 */ 2122 op = MPI_MAXLOC; 2123 #if defined(PETSC_USE_64BIT_INDICES) 2124 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2125 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2126 if (iswin) op = MPI_REPLACE; 2127 #endif 2128 2129 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2130 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2131 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2132 reorderedRemotePointsA[i].rank = -1; 2133 reorderedRemotePointsA[i].index = -1; 2134 } 2135 if (localPointsA) { 2136 for (i = 0; i < numLeavesA; i++) { 2137 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2138 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2139 } 2140 } else { 2141 for (i = 0; i < numLeavesA; i++) { 2142 if (i > maxleaf || i < minleaf) continue; 2143 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2144 } 2145 } 2146 2147 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2148 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2149 for (i = 0; i < numRootsB; i++) { 2150 remotePointsBA[i].rank = -1; 2151 remotePointsBA[i].index = -1; 2152 } 2153 2154 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2155 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2156 PetscCall(PetscFree(reorderedRemotePointsA)); 2157 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2158 if (remotePointsBA[i].rank == -1) continue; 2159 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2160 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2161 localPointsBA[numLeavesBA] = i; 2162 numLeavesBA++; 2163 } 2164 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2165 PetscCall(PetscSFSetFromOptions(*sfBA)); 2166 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2167 PetscFunctionReturn(PETSC_SUCCESS); 2168 } 2169 2170 /* 2171 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2172 2173 Input Parameter: 2174 . sf - The global `PetscSF` 2175 2176 Output Parameter: 2177 . out - The local `PetscSF` 2178 2179 .seealso: `PetscSF`, `PetscSFCreate()` 2180 */ 2181 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2182 { 2183 MPI_Comm comm; 2184 PetscMPIInt myrank; 2185 const PetscInt *ilocal; 2186 const PetscSFNode *iremote; 2187 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2188 PetscSFNode *liremote; 2189 PetscSF lsf; 2190 2191 PetscFunctionBegin; 2192 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2193 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2194 else { 2195 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2196 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2197 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2198 2199 /* Find out local edges and build a local SF */ 2200 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2201 for (i = lnleaves = 0; i < nleaves; i++) { 2202 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2203 } 2204 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2205 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2206 2207 for (i = j = 0; i < nleaves; i++) { 2208 if (iremote[i].rank == (PetscInt)myrank) { 2209 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2210 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2211 liremote[j].index = iremote[i].index; 2212 j++; 2213 } 2214 } 2215 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2216 PetscCall(PetscSFSetFromOptions(lsf)); 2217 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2218 PetscCall(PetscSFSetUp(lsf)); 2219 *out = lsf; 2220 } 2221 PetscFunctionReturn(PETSC_SUCCESS); 2222 } 2223 2224 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2225 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2226 { 2227 PetscMemType rootmtype, leafmtype; 2228 2229 PetscFunctionBegin; 2230 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2231 PetscCall(PetscSFSetUp(sf)); 2232 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2233 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2234 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2235 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2236 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2237 PetscFunctionReturn(PETSC_SUCCESS); 2238 } 2239 2240 /*@ 2241 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2242 2243 Input Parameters: 2244 + comm - the communicator 2245 . nsfs - the number of input `PetscSF` 2246 . sfs - the array of input `PetscSF` 2247 . rootMode - the root mode specifying how roots are handled 2248 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2249 2250 Output Parameter: 2251 . newsf - The resulting `PetscSF` 2252 2253 Level: advanced 2254 2255 Notes: 2256 The communicator of all `PetscSF`s in `sfs` must be comm. 2257 2258 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2259 2260 The offsets in `leafOffsets` are added to the original leaf indices. 2261 2262 If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2263 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2264 2265 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2266 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2267 2268 All root modes retain the essential connectivity condition. 2269 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2270 Parameter `rootMode` controls how the input root spaces are combined. 2271 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2272 and is also the same in the output `PetscSF`. 2273 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2274 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2275 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2276 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2277 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2278 the original root ranks are ignored. 2279 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2280 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 2281 to keep the load balancing. 2282 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 2283 2284 Example: 2285 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2286 .vb 2287 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2288 for m in {local,global,shared}; do 2289 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2290 done 2291 .ve 2292 we generate two identical `PetscSF`s sf_0 and sf_1, 2293 .vb 2294 PetscSF Object: sf_0 2 MPI processes 2295 type: basic 2296 rank #leaves #roots 2297 [ 0] 4 2 2298 [ 1] 4 2 2299 leaves roots roots in global numbering 2300 ( 0, 0) <- ( 0, 0) = 0 2301 ( 0, 1) <- ( 0, 1) = 1 2302 ( 0, 2) <- ( 1, 0) = 2 2303 ( 0, 3) <- ( 1, 1) = 3 2304 ( 1, 0) <- ( 0, 0) = 0 2305 ( 1, 1) <- ( 0, 1) = 1 2306 ( 1, 2) <- ( 1, 0) = 2 2307 ( 1, 3) <- ( 1, 1) = 3 2308 .ve 2309 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2310 .vb 2311 rootMode = local: 2312 PetscSF Object: result_sf 2 MPI processes 2313 type: basic 2314 rank #leaves #roots 2315 [ 0] 8 4 2316 [ 1] 8 4 2317 leaves roots roots in global numbering 2318 ( 0, 0) <- ( 0, 0) = 0 2319 ( 0, 1) <- ( 0, 1) = 1 2320 ( 0, 2) <- ( 1, 0) = 4 2321 ( 0, 3) <- ( 1, 1) = 5 2322 ( 0, 4) <- ( 0, 2) = 2 2323 ( 0, 5) <- ( 0, 3) = 3 2324 ( 0, 6) <- ( 1, 2) = 6 2325 ( 0, 7) <- ( 1, 3) = 7 2326 ( 1, 0) <- ( 0, 0) = 0 2327 ( 1, 1) <- ( 0, 1) = 1 2328 ( 1, 2) <- ( 1, 0) = 4 2329 ( 1, 3) <- ( 1, 1) = 5 2330 ( 1, 4) <- ( 0, 2) = 2 2331 ( 1, 5) <- ( 0, 3) = 3 2332 ( 1, 6) <- ( 1, 2) = 6 2333 ( 1, 7) <- ( 1, 3) = 7 2334 2335 rootMode = global: 2336 PetscSF Object: result_sf 2 MPI processes 2337 type: basic 2338 rank #leaves #roots 2339 [ 0] 8 4 2340 [ 1] 8 4 2341 leaves roots roots in global numbering 2342 ( 0, 0) <- ( 0, 0) = 0 2343 ( 0, 1) <- ( 0, 1) = 1 2344 ( 0, 2) <- ( 0, 2) = 2 2345 ( 0, 3) <- ( 0, 3) = 3 2346 ( 0, 4) <- ( 1, 0) = 4 2347 ( 0, 5) <- ( 1, 1) = 5 2348 ( 0, 6) <- ( 1, 2) = 6 2349 ( 0, 7) <- ( 1, 3) = 7 2350 ( 1, 0) <- ( 0, 0) = 0 2351 ( 1, 1) <- ( 0, 1) = 1 2352 ( 1, 2) <- ( 0, 2) = 2 2353 ( 1, 3) <- ( 0, 3) = 3 2354 ( 1, 4) <- ( 1, 0) = 4 2355 ( 1, 5) <- ( 1, 1) = 5 2356 ( 1, 6) <- ( 1, 2) = 6 2357 ( 1, 7) <- ( 1, 3) = 7 2358 2359 rootMode = shared: 2360 PetscSF Object: result_sf 2 MPI processes 2361 type: basic 2362 rank #leaves #roots 2363 [ 0] 8 2 2364 [ 1] 8 2 2365 leaves roots roots in global numbering 2366 ( 0, 0) <- ( 0, 0) = 0 2367 ( 0, 1) <- ( 0, 1) = 1 2368 ( 0, 2) <- ( 1, 0) = 2 2369 ( 0, 3) <- ( 1, 1) = 3 2370 ( 0, 4) <- ( 0, 0) = 0 2371 ( 0, 5) <- ( 0, 1) = 1 2372 ( 0, 6) <- ( 1, 0) = 2 2373 ( 0, 7) <- ( 1, 1) = 3 2374 ( 1, 0) <- ( 0, 0) = 0 2375 ( 1, 1) <- ( 0, 1) = 1 2376 ( 1, 2) <- ( 1, 0) = 2 2377 ( 1, 3) <- ( 1, 1) = 3 2378 ( 1, 4) <- ( 0, 0) = 0 2379 ( 1, 5) <- ( 0, 1) = 1 2380 ( 1, 6) <- ( 1, 0) = 2 2381 ( 1, 7) <- ( 1, 1) = 3 2382 .ve 2383 2384 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2385 @*/ 2386 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2387 { 2388 PetscInt i, s, nLeaves, nRoots; 2389 PetscInt *leafArrayOffsets; 2390 PetscInt *ilocal_new; 2391 PetscSFNode *iremote_new; 2392 PetscBool all_ilocal_null = PETSC_FALSE; 2393 PetscLayout glayout = NULL; 2394 PetscInt *gremote = NULL; 2395 PetscMPIInt rank, size; 2396 2397 PetscFunctionBegin; 2398 if (PetscDefined(USE_DEBUG)) { 2399 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2400 2401 PetscCall(PetscSFCreate(comm, &dummy)); 2402 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2403 PetscAssertPointer(sfs, 3); 2404 for (i = 0; i < nsfs; i++) { 2405 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2406 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2407 } 2408 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2409 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2410 PetscAssertPointer(newsf, 6); 2411 PetscCall(PetscSFDestroy(&dummy)); 2412 } 2413 if (!nsfs) { 2414 PetscCall(PetscSFCreate(comm, newsf)); 2415 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2416 PetscFunctionReturn(PETSC_SUCCESS); 2417 } 2418 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2419 PetscCallMPI(MPI_Comm_size(comm, &size)); 2420 2421 /* Calculate leaf array offsets */ 2422 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2423 leafArrayOffsets[0] = 0; 2424 for (s = 0; s < nsfs; s++) { 2425 PetscInt nl; 2426 2427 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2428 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2429 } 2430 nLeaves = leafArrayOffsets[nsfs]; 2431 2432 /* Calculate number of roots */ 2433 switch (rootMode) { 2434 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2435 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2436 if (PetscDefined(USE_DEBUG)) { 2437 for (s = 1; s < nsfs; s++) { 2438 PetscInt nr; 2439 2440 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2441 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2442 } 2443 } 2444 } break; 2445 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2446 /* Calculate also global layout in this case */ 2447 PetscInt *nls; 2448 PetscLayout *lts; 2449 PetscInt **inds; 2450 PetscInt j; 2451 PetscInt rootOffset = 0; 2452 2453 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2454 PetscCall(PetscLayoutCreate(comm, &glayout)); 2455 glayout->bs = 1; 2456 glayout->n = 0; 2457 glayout->N = 0; 2458 for (s = 0; s < nsfs; s++) { 2459 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2460 glayout->n += lts[s]->n; 2461 glayout->N += lts[s]->N; 2462 } 2463 PetscCall(PetscLayoutSetUp(glayout)); 2464 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2465 for (s = 0, j = 0; s < nsfs; s++) { 2466 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2467 rootOffset += lts[s]->N; 2468 PetscCall(PetscLayoutDestroy(<s[s])); 2469 PetscCall(PetscFree(inds[s])); 2470 } 2471 PetscCall(PetscFree3(lts, nls, inds)); 2472 nRoots = glayout->N; 2473 } break; 2474 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2475 /* nRoots calculated later in this case */ 2476 break; 2477 default: 2478 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2479 } 2480 2481 if (!leafOffsets) { 2482 all_ilocal_null = PETSC_TRUE; 2483 for (s = 0; s < nsfs; s++) { 2484 const PetscInt *ilocal; 2485 2486 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2487 if (ilocal) { 2488 all_ilocal_null = PETSC_FALSE; 2489 break; 2490 } 2491 } 2492 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2493 } 2494 2495 /* Renumber and concatenate local leaves */ 2496 ilocal_new = NULL; 2497 if (!all_ilocal_null) { 2498 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2499 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2500 for (s = 0; s < nsfs; s++) { 2501 const PetscInt *ilocal; 2502 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2503 PetscInt i, nleaves_l; 2504 2505 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2506 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2507 } 2508 } 2509 2510 /* Renumber and concatenate remote roots */ 2511 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2512 PetscInt rootOffset = 0; 2513 2514 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2515 for (i = 0; i < nLeaves; i++) { 2516 iremote_new[i].rank = -1; 2517 iremote_new[i].index = -1; 2518 } 2519 for (s = 0; s < nsfs; s++) { 2520 PetscInt i, nl, nr; 2521 PetscSF tmp_sf; 2522 const PetscSFNode *iremote; 2523 PetscSFNode *tmp_rootdata; 2524 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2525 2526 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2527 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2528 /* create helper SF with contiguous leaves */ 2529 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2530 PetscCall(PetscSFSetUp(tmp_sf)); 2531 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2532 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2533 for (i = 0; i < nr; i++) { 2534 tmp_rootdata[i].index = i + rootOffset; 2535 tmp_rootdata[i].rank = (PetscInt)rank; 2536 } 2537 rootOffset += nr; 2538 } else { 2539 for (i = 0; i < nr; i++) { 2540 tmp_rootdata[i].index = i; 2541 tmp_rootdata[i].rank = (PetscInt)rank; 2542 } 2543 } 2544 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2545 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2546 PetscCall(PetscSFDestroy(&tmp_sf)); 2547 PetscCall(PetscFree(tmp_rootdata)); 2548 } 2549 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2550 2551 /* Build the new SF */ 2552 PetscCall(PetscSFCreate(comm, newsf)); 2553 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2554 } else { 2555 /* Build the new SF */ 2556 PetscCall(PetscSFCreate(comm, newsf)); 2557 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2558 } 2559 PetscCall(PetscSFSetUp(*newsf)); 2560 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2561 PetscCall(PetscLayoutDestroy(&glayout)); 2562 PetscCall(PetscFree(gremote)); 2563 PetscCall(PetscFree(leafArrayOffsets)); 2564 PetscFunctionReturn(PETSC_SUCCESS); 2565 } 2566