1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #include <petscdevice_cuda.h> 9 #endif 10 11 #if defined(PETSC_HAVE_HIP) 12 #include <hip/hip_runtime.h> 13 #endif 14 15 #if defined(PETSC_CLANG_STATIC_ANALYZER) 16 extern void PetscSFCheckGraphSet(PetscSF, int); 17 #else 18 #if defined(PETSC_USE_DEBUG) 19 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 20 #else 21 #define PetscSFCheckGraphSet(sf, arg) \ 22 do { \ 23 } while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 29 30 /*@ 31 PetscSFCreate - create a star forest communication context 32 33 Collective 34 35 Input Parameter: 36 . comm - communicator on which the star forest will operate 37 38 Output Parameter: 39 . sf - new star forest context 40 41 Options Database Key: 42 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 43 . -sf_type window - Use MPI-3 one-sided window for communication 44 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 46 47 Level: intermediate 48 49 Note: 50 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 51 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 52 `SF`s are optimized and they have better performance than the general `SF`s. 53 54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 55 @*/ 56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 57 { 58 PetscSF b; 59 60 PetscFunctionBegin; 61 PetscAssertPointer(sf, 2); 62 PetscCall(PetscSFInitializePackage()); 63 64 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 65 b->nroots = -1; 66 b->nleaves = -1; 67 b->minleaf = PETSC_MAX_INT; 68 b->maxleaf = PETSC_MIN_INT; 69 b->nranks = -1; 70 b->rankorder = PETSC_TRUE; 71 b->ingroup = MPI_GROUP_NULL; 72 b->outgroup = MPI_GROUP_NULL; 73 b->graphset = PETSC_FALSE; 74 #if defined(PETSC_HAVE_DEVICE) 75 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 76 b->use_stream_aware_mpi = PETSC_FALSE; 77 b->unknown_input_stream = PETSC_FALSE; 78 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 79 b->backend = PETSCSF_BACKEND_KOKKOS; 80 #elif defined(PETSC_HAVE_CUDA) 81 b->backend = PETSCSF_BACKEND_CUDA; 82 #elif defined(PETSC_HAVE_HIP) 83 b->backend = PETSCSF_BACKEND_HIP; 84 #endif 85 86 #if defined(PETSC_HAVE_NVSHMEM) 87 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 88 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 89 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 90 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 91 #endif 92 #endif 93 b->vscat.from_n = -1; 94 b->vscat.to_n = -1; 95 b->vscat.unit = MPIU_SCALAR; 96 *sf = b; 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 /*@ 101 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 102 103 Collective 104 105 Input Parameter: 106 . sf - star forest 107 108 Level: advanced 109 110 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 111 @*/ 112 PetscErrorCode PetscSFReset(PetscSF sf) 113 { 114 PetscFunctionBegin; 115 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 116 PetscTryTypeMethod(sf, Reset); 117 PetscCall(PetscSFDestroy(&sf->rankssf)); 118 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_MAX_INT; 122 sf->maxleaf = PETSC_MIN_INT; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 PetscCall(PetscFree(sf->mine_alloc)); 127 PetscCall(PetscFree(sf->remote_alloc)); 128 sf->nranks = -1; 129 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 130 sf->degreeknown = PETSC_FALSE; 131 PetscCall(PetscFree(sf->degree)); 132 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 133 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 134 135 if (sf->multi) sf->multi->multi = NULL; 136 PetscCall(PetscSFDestroy(&sf->multi)); 137 138 PetscCall(PetscLayoutDestroy(&sf->map)); 139 140 #if defined(PETSC_HAVE_DEVICE) 141 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 142 #endif 143 144 sf->setupcalled = PETSC_FALSE; 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 /*@ 149 PetscSFSetType - Set the `PetscSF` communication implementation 150 151 Collective 152 153 Input Parameters: 154 + sf - the `PetscSF` context 155 - type - a known method 156 .vb 157 PETSCSFWINDOW - MPI-2/3 one-sided 158 PETSCSFBASIC - basic implementation using MPI-1 two-sided 159 .ve 160 161 Options Database Key: 162 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 163 164 Level: intermediate 165 166 Notes: 167 See `PetscSFType` for possible values 168 169 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 170 @*/ 171 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 172 { 173 PetscBool match; 174 PetscErrorCode (*r)(PetscSF); 175 176 PetscFunctionBegin; 177 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 178 PetscAssertPointer(type, 2); 179 180 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 181 if (match) PetscFunctionReturn(PETSC_SUCCESS); 182 183 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 184 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 185 /* Destroy the previous PetscSF implementation context */ 186 PetscTryTypeMethod(sf, Destroy); 187 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 188 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 189 PetscCall((*r)(sf)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 /*@ 194 PetscSFGetType - Get the `PetscSF` communication implementation 195 196 Not Collective 197 198 Input Parameter: 199 . sf - the `PetscSF` context 200 201 Output Parameter: 202 . type - the `PetscSF` type name 203 204 Level: intermediate 205 206 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 207 @*/ 208 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 209 { 210 PetscFunctionBegin; 211 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 212 PetscAssertPointer(type, 2); 213 *type = ((PetscObject)sf)->type_name; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 /*@ 218 PetscSFDestroy - destroy a star forest 219 220 Collective 221 222 Input Parameter: 223 . sf - address of star forest 224 225 Level: intermediate 226 227 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 228 @*/ 229 PetscErrorCode PetscSFDestroy(PetscSF *sf) 230 { 231 PetscFunctionBegin; 232 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 233 PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 234 if (--((PetscObject)*sf)->refct > 0) { 235 *sf = NULL; 236 PetscFunctionReturn(PETSC_SUCCESS); 237 } 238 PetscCall(PetscSFReset(*sf)); 239 PetscTryTypeMethod(*sf, Destroy); 240 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 241 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 242 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 243 if ((*sf)->use_stream_aware_mpi) { 244 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 245 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 246 } 247 #endif 248 PetscCall(PetscHeaderDestroy(sf)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 253 { 254 PetscInt i, nleaves; 255 PetscMPIInt size; 256 const PetscInt *ilocal; 257 const PetscSFNode *iremote; 258 259 PetscFunctionBegin; 260 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 261 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 262 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 263 for (i = 0; i < nleaves; i++) { 264 const PetscInt rank = iremote[i].rank; 265 const PetscInt remote = iremote[i].index; 266 const PetscInt leaf = ilocal ? ilocal[i] : i; 267 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 268 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 269 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 270 } 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 /*@ 275 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 276 277 Collective 278 279 Input Parameter: 280 . sf - star forest communication object 281 282 Level: beginner 283 284 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 285 @*/ 286 PetscErrorCode PetscSFSetUp(PetscSF sf) 287 { 288 PetscFunctionBegin; 289 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 290 PetscSFCheckGraphSet(sf, 1); 291 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 292 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 293 PetscCall(PetscSFCheckGraphValid_Private(sf)); 294 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 295 PetscTryTypeMethod(sf, SetUp); 296 #if defined(PETSC_HAVE_CUDA) 297 if (sf->backend == PETSCSF_BACKEND_CUDA) { 298 sf->ops->Malloc = PetscSFMalloc_CUDA; 299 sf->ops->Free = PetscSFFree_CUDA; 300 } 301 #endif 302 #if defined(PETSC_HAVE_HIP) 303 if (sf->backend == PETSCSF_BACKEND_HIP) { 304 sf->ops->Malloc = PetscSFMalloc_HIP; 305 sf->ops->Free = PetscSFFree_HIP; 306 } 307 #endif 308 309 #if defined(PETSC_HAVE_KOKKOS) 310 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 311 sf->ops->Malloc = PetscSFMalloc_Kokkos; 312 sf->ops->Free = PetscSFFree_Kokkos; 313 } 314 #endif 315 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 316 sf->setupcalled = PETSC_TRUE; 317 PetscFunctionReturn(PETSC_SUCCESS); 318 } 319 320 /*@ 321 PetscSFSetFromOptions - set `PetscSF` options using the options database 322 323 Logically Collective 324 325 Input Parameter: 326 . sf - star forest 327 328 Options Database Keys: 329 + -sf_type - implementation type, see `PetscSFSetType()` 330 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 331 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 332 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 333 If true, this option only works with `-use_gpu_aware_mpi 1`. 334 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 335 If true, this option only works with `-use_gpu_aware_mpi 1`. 336 337 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 338 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 339 the only available is kokkos. 340 341 Level: intermediate 342 343 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 344 @*/ 345 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 346 { 347 PetscSFType deft; 348 char type[256]; 349 PetscBool flg; 350 351 PetscFunctionBegin; 352 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 353 PetscObjectOptionsBegin((PetscObject)sf); 354 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 355 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 356 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 357 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 358 #if defined(PETSC_HAVE_DEVICE) 359 { 360 char backendstr[32] = {0}; 361 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 362 /* Change the defaults set in PetscSFCreate() with command line options */ 363 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 364 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 365 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 366 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 367 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 368 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 369 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 370 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 371 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 372 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 373 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 374 #elif defined(PETSC_HAVE_KOKKOS) 375 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 376 #endif 377 378 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 379 if (sf->use_stream_aware_mpi) { 380 MPI_Info info; 381 382 PetscCallMPI(MPI_Info_create(&info)); 383 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 384 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 385 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 386 PetscCallMPI(MPI_Info_free(&info)); 387 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 388 } 389 #endif 390 } 391 #endif 392 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 393 PetscOptionsEnd(); 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 397 /*@ 398 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 399 400 Logically Collective 401 402 Input Parameters: 403 + sf - star forest 404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 405 406 Level: advanced 407 408 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 409 @*/ 410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 411 { 412 PetscFunctionBegin; 413 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 414 PetscValidLogicalCollectiveBool(sf, flg, 2); 415 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 416 sf->rankorder = flg; 417 PetscFunctionReturn(PETSC_SUCCESS); 418 } 419 420 /*@ 421 PetscSFSetGraph - Set a parallel star forest 422 423 Collective 424 425 Input Parameters: 426 + sf - star forest 427 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 428 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 429 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 430 during setup in debug mode) 431 . localmode - copy mode for `ilocal` 432 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 433 during setup in debug mode) 434 - remotemode - copy mode for `iremote` 435 436 Level: intermediate 437 438 Notes: 439 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 440 441 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 442 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 443 PETSc might modify the respective array; 444 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 445 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 446 447 Fortran Notes: 448 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 449 450 Developer Notes: 451 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 452 This also allows to compare leaf sets of two `PetscSF`s easily. 453 454 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 455 @*/ 456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 457 { 458 PetscBool unique, contiguous; 459 460 PetscFunctionBegin; 461 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 462 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 463 if (nleaves > 0) PetscAssertPointer(iremote, 6); 464 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 465 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 466 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 467 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 468 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 469 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 470 471 if (sf->nroots >= 0) { /* Reset only if graph already set */ 472 PetscCall(PetscSFReset(sf)); 473 } 474 475 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 476 477 sf->nroots = nroots; 478 sf->nleaves = nleaves; 479 480 if (localmode == PETSC_COPY_VALUES && ilocal) { 481 PetscInt *tlocal = NULL; 482 483 PetscCall(PetscMalloc1(nleaves, &tlocal)); 484 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 485 ilocal = tlocal; 486 } 487 if (remotemode == PETSC_COPY_VALUES) { 488 PetscSFNode *tremote = NULL; 489 490 PetscCall(PetscMalloc1(nleaves, &tremote)); 491 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 492 iremote = tremote; 493 } 494 495 if (nleaves && ilocal) { 496 PetscSFNode work; 497 498 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 499 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 500 unique = PetscNot(unique); 501 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 502 sf->minleaf = ilocal[0]; 503 sf->maxleaf = ilocal[nleaves - 1]; 504 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 505 } else { 506 sf->minleaf = 0; 507 sf->maxleaf = nleaves - 1; 508 unique = PETSC_TRUE; 509 contiguous = PETSC_TRUE; 510 } 511 512 if (contiguous) { 513 if (localmode == PETSC_USE_POINTER) { 514 ilocal = NULL; 515 } else { 516 PetscCall(PetscFree(ilocal)); 517 } 518 } 519 sf->mine = ilocal; 520 if (localmode == PETSC_USE_POINTER) { 521 sf->mine_alloc = NULL; 522 } else { 523 sf->mine_alloc = ilocal; 524 } 525 sf->remote = iremote; 526 if (remotemode == PETSC_USE_POINTER) { 527 sf->remote_alloc = NULL; 528 } else { 529 sf->remote_alloc = iremote; 530 } 531 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 532 sf->graphset = PETSC_TRUE; 533 PetscFunctionReturn(PETSC_SUCCESS); 534 } 535 536 /*@ 537 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 538 539 Collective 540 541 Input Parameters: 542 + sf - The `PetscSF` 543 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 544 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 545 546 Level: intermediate 547 548 Notes: 549 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 550 `n` and `N` are the local and global sizes of `x` respectively. 551 552 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 553 sequential vectors `y` on all MPI processes. 554 555 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 556 sequential vector `y` on rank 0. 557 558 In above cases, entries of `x` are roots and entries of `y` are leaves. 559 560 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 561 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 562 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 563 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 564 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 565 566 In this case, roots and leaves are symmetric. 567 568 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 569 @*/ 570 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 571 { 572 MPI_Comm comm; 573 PetscInt n, N, res[2]; 574 PetscMPIInt rank, size; 575 PetscSFType type; 576 577 PetscFunctionBegin; 578 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 579 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 580 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 581 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 582 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 583 PetscCallMPI(MPI_Comm_size(comm, &size)); 584 585 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 586 type = PETSCSFALLTOALL; 587 PetscCall(PetscLayoutCreate(comm, &sf->map)); 588 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 589 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 590 PetscCall(PetscLayoutSetUp(sf->map)); 591 } else { 592 PetscCall(PetscLayoutGetLocalSize(map, &n)); 593 PetscCall(PetscLayoutGetSize(map, &N)); 594 res[0] = n; 595 res[1] = -n; 596 /* Check if n are same over all ranks so that we can optimize it */ 597 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 598 if (res[0] == -res[1]) { /* same n */ 599 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 600 } else { 601 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 602 } 603 PetscCall(PetscLayoutReference(map, &sf->map)); 604 } 605 PetscCall(PetscSFSetType(sf, type)); 606 607 sf->pattern = pattern; 608 sf->mine = NULL; /* Contiguous */ 609 610 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 611 Also set other easy stuff. 612 */ 613 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 614 sf->nleaves = N; 615 sf->nroots = n; 616 sf->nranks = size; 617 sf->minleaf = 0; 618 sf->maxleaf = N - 1; 619 } else if (pattern == PETSCSF_PATTERN_GATHER) { 620 sf->nleaves = rank ? 0 : N; 621 sf->nroots = n; 622 sf->nranks = rank ? 0 : size; 623 sf->minleaf = 0; 624 sf->maxleaf = rank ? -1 : N - 1; 625 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 626 sf->nleaves = size; 627 sf->nroots = size; 628 sf->nranks = size; 629 sf->minleaf = 0; 630 sf->maxleaf = size - 1; 631 } 632 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 633 sf->graphset = PETSC_TRUE; 634 PetscFunctionReturn(PETSC_SUCCESS); 635 } 636 637 /*@ 638 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 639 640 Collective 641 642 Input Parameter: 643 . sf - star forest to invert 644 645 Output Parameter: 646 . isf - inverse of `sf` 647 648 Level: advanced 649 650 Notes: 651 All roots must have degree 1. 652 653 The local space may be a permutation, but cannot be sparse. 654 655 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 656 @*/ 657 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 658 { 659 PetscMPIInt rank; 660 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 661 const PetscInt *ilocal; 662 PetscSFNode *roots, *leaves; 663 664 PetscFunctionBegin; 665 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 666 PetscSFCheckGraphSet(sf, 1); 667 PetscAssertPointer(isf, 2); 668 669 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 670 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 671 672 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 673 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 674 for (i = 0; i < maxlocal; i++) { 675 leaves[i].rank = rank; 676 leaves[i].index = i; 677 } 678 for (i = 0; i < nroots; i++) { 679 roots[i].rank = -1; 680 roots[i].index = -1; 681 } 682 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 683 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 684 685 /* Check whether our leaves are sparse */ 686 for (i = 0, count = 0; i < nroots; i++) 687 if (roots[i].rank >= 0) count++; 688 if (count == nroots) newilocal = NULL; 689 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 690 for (i = 0, count = 0; i < nroots; i++) { 691 if (roots[i].rank >= 0) { 692 newilocal[count] = i; 693 roots[count].rank = roots[i].rank; 694 roots[count].index = roots[i].index; 695 count++; 696 } 697 } 698 } 699 700 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 701 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 702 PetscCall(PetscFree2(roots, leaves)); 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 /*@ 707 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 708 709 Collective 710 711 Input Parameters: 712 + sf - communication object to duplicate 713 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 714 715 Output Parameter: 716 . newsf - new communication object 717 718 Level: beginner 719 720 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 721 @*/ 722 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 723 { 724 PetscSFType type; 725 MPI_Datatype dtype = MPIU_SCALAR; 726 727 PetscFunctionBegin; 728 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 729 PetscValidLogicalCollectiveEnum(sf, opt, 2); 730 PetscAssertPointer(newsf, 3); 731 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 732 PetscCall(PetscSFGetType(sf, &type)); 733 if (type) PetscCall(PetscSFSetType(*newsf, type)); 734 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 735 if (opt == PETSCSF_DUPLICATE_GRAPH) { 736 PetscSFCheckGraphSet(sf, 1); 737 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 738 PetscInt nroots, nleaves; 739 const PetscInt *ilocal; 740 const PetscSFNode *iremote; 741 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 742 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 743 } else { 744 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 745 } 746 } 747 /* Since oldtype is committed, so is newtype, according to MPI */ 748 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 749 (*newsf)->vscat.bs = sf->vscat.bs; 750 (*newsf)->vscat.unit = dtype; 751 (*newsf)->vscat.to_n = sf->vscat.to_n; 752 (*newsf)->vscat.from_n = sf->vscat.from_n; 753 /* Do not copy lsf. Build it on demand since it is rarely used */ 754 755 #if defined(PETSC_HAVE_DEVICE) 756 (*newsf)->backend = sf->backend; 757 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 758 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 759 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 760 #endif 761 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 762 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 /*@C 767 PetscSFGetGraph - Get the graph specifying a parallel star forest 768 769 Not Collective 770 771 Input Parameter: 772 . sf - star forest 773 774 Output Parameters: 775 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 776 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 777 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 778 - iremote - remote locations of root vertices for each leaf on the current process 779 780 Level: intermediate 781 782 Notes: 783 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 784 785 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 786 787 Fortran Notes: 788 The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you 789 want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array. 790 791 To check for a `NULL` `ilocal` use 792 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 793 794 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 795 @*/ 796 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 797 { 798 PetscFunctionBegin; 799 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 800 if (sf->ops->GetGraph) { 801 PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 802 } else { 803 if (nroots) *nroots = sf->nroots; 804 if (nleaves) *nleaves = sf->nleaves; 805 if (ilocal) *ilocal = sf->mine; 806 if (iremote) *iremote = sf->remote; 807 } 808 PetscFunctionReturn(PETSC_SUCCESS); 809 } 810 811 /*@ 812 PetscSFGetLeafRange - Get the active leaf ranges 813 814 Not Collective 815 816 Input Parameter: 817 . sf - star forest 818 819 Output Parameters: 820 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 821 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 822 823 Level: developer 824 825 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 826 @*/ 827 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 828 { 829 PetscFunctionBegin; 830 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 831 PetscSFCheckGraphSet(sf, 1); 832 if (minleaf) *minleaf = sf->minleaf; 833 if (maxleaf) *maxleaf = sf->maxleaf; 834 PetscFunctionReturn(PETSC_SUCCESS); 835 } 836 837 /*@ 838 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 839 840 Collective 841 842 Input Parameters: 843 + A - the star forest 844 . obj - Optional object that provides the prefix for the option names 845 - name - command line option 846 847 Level: intermediate 848 849 Note: 850 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 851 852 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 853 @*/ 854 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 855 { 856 PetscFunctionBegin; 857 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 858 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 859 PetscFunctionReturn(PETSC_SUCCESS); 860 } 861 862 /*@ 863 PetscSFView - view a star forest 864 865 Collective 866 867 Input Parameters: 868 + sf - star forest 869 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 870 871 Level: beginner 872 873 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 874 @*/ 875 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 876 { 877 PetscBool iascii; 878 PetscViewerFormat format; 879 880 PetscFunctionBegin; 881 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 882 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 883 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 884 PetscCheckSameComm(sf, 1, viewer, 2); 885 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 886 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 887 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 888 PetscMPIInt rank; 889 PetscInt ii, i, j; 890 891 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 892 PetscCall(PetscViewerASCIIPushTab(viewer)); 893 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 894 if (!sf->graphset) { 895 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 896 PetscCall(PetscViewerASCIIPopTab(viewer)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 900 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 901 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 902 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 903 PetscCall(PetscViewerFlush(viewer)); 904 PetscCall(PetscViewerGetFormat(viewer, &format)); 905 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 906 PetscMPIInt *tmpranks, *perm; 907 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 908 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 909 for (i = 0; i < sf->nranks; i++) perm[i] = i; 910 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 911 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 912 for (ii = 0; ii < sf->nranks; ii++) { 913 i = perm[ii]; 914 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 915 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 916 } 917 PetscCall(PetscFree2(tmpranks, perm)); 918 } 919 PetscCall(PetscViewerFlush(viewer)); 920 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 921 } 922 PetscCall(PetscViewerASCIIPopTab(viewer)); 923 } 924 PetscTryTypeMethod(sf, View, viewer); 925 PetscFunctionReturn(PETSC_SUCCESS); 926 } 927 928 /*@C 929 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 930 931 Not Collective 932 933 Input Parameter: 934 . sf - star forest 935 936 Output Parameters: 937 + nranks - number of ranks referenced by local part 938 . ranks - [`nranks`] array of ranks 939 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 940 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank 941 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank 942 943 Level: developer 944 945 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 946 @*/ 947 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 948 { 949 PetscFunctionBegin; 950 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 951 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 952 if (sf->ops->GetRootRanks) { 953 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 954 } else { 955 /* The generic implementation */ 956 if (nranks) *nranks = sf->nranks; 957 if (ranks) *ranks = sf->ranks; 958 if (roffset) *roffset = sf->roffset; 959 if (rmine) *rmine = sf->rmine; 960 if (rremote) *rremote = sf->rremote; 961 } 962 PetscFunctionReturn(PETSC_SUCCESS); 963 } 964 965 /*@C 966 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 967 968 Not Collective 969 970 Input Parameter: 971 . sf - star forest 972 973 Output Parameters: 974 + niranks - number of leaf ranks referencing roots on this process 975 . iranks - [`niranks`] array of ranks 976 . ioffset - [`niranks`+1] offset in `irootloc` for each rank 977 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 978 979 Level: developer 980 981 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 982 @*/ 983 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 984 { 985 PetscFunctionBegin; 986 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 987 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 988 if (sf->ops->GetLeafRanks) { 989 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 990 } else { 991 PetscSFType type; 992 PetscCall(PetscSFGetType(sf, &type)); 993 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 994 } 995 PetscFunctionReturn(PETSC_SUCCESS); 996 } 997 998 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 999 { 1000 PetscInt i; 1001 for (i = 0; i < n; i++) { 1002 if (needle == list[i]) return PETSC_TRUE; 1003 } 1004 return PETSC_FALSE; 1005 } 1006 1007 /*@C 1008 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 1009 1010 Collective 1011 1012 Input Parameters: 1013 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1014 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1015 1016 Level: developer 1017 1018 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 1019 @*/ 1020 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1021 { 1022 PetscHMapI table; 1023 PetscHashIter pos; 1024 PetscMPIInt size, groupsize, *groupranks; 1025 PetscInt *rcount, *ranks; 1026 PetscInt i, irank = -1, orank = -1; 1027 1028 PetscFunctionBegin; 1029 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1030 PetscSFCheckGraphSet(sf, 1); 1031 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1032 PetscCall(PetscHMapICreateWithSize(10, &table)); 1033 for (i = 0; i < sf->nleaves; i++) { 1034 /* Log 1-based rank */ 1035 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1036 } 1037 PetscCall(PetscHMapIGetSize(table, &sf->nranks)); 1038 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1039 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1040 PetscHashIterBegin(table, pos); 1041 for (i = 0; i < sf->nranks; i++) { 1042 PetscHashIterGetKey(table, pos, ranks[i]); 1043 PetscHashIterGetVal(table, pos, rcount[i]); 1044 PetscHashIterNext(table, pos); 1045 ranks[i]--; /* Convert back to 0-based */ 1046 } 1047 PetscCall(PetscHMapIDestroy(&table)); 1048 1049 /* We expect that dgroup is reliably "small" while nranks could be large */ 1050 { 1051 MPI_Group group = MPI_GROUP_NULL; 1052 PetscMPIInt *dgroupranks; 1053 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1054 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1055 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1056 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1057 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1058 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1059 PetscCallMPI(MPI_Group_free(&group)); 1060 PetscCall(PetscFree(dgroupranks)); 1061 } 1062 1063 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1064 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1065 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1066 if (InList(ranks[i], groupsize, groupranks)) break; 1067 } 1068 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1069 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1070 } 1071 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1072 PetscInt tmprank, tmpcount; 1073 1074 tmprank = ranks[i]; 1075 tmpcount = rcount[i]; 1076 ranks[i] = ranks[sf->ndranks]; 1077 rcount[i] = rcount[sf->ndranks]; 1078 ranks[sf->ndranks] = tmprank; 1079 rcount[sf->ndranks] = tmpcount; 1080 sf->ndranks++; 1081 } 1082 } 1083 PetscCall(PetscFree(groupranks)); 1084 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1085 if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1086 sf->roffset[0] = 0; 1087 for (i = 0; i < sf->nranks; i++) { 1088 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1089 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1090 rcount[i] = 0; 1091 } 1092 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1093 /* short circuit */ 1094 if (orank != sf->remote[i].rank) { 1095 /* Search for index of iremote[i].rank in sf->ranks */ 1096 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1097 if (irank < 0) { 1098 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1099 if (irank >= 0) irank += sf->ndranks; 1100 } 1101 orank = sf->remote[i].rank; 1102 } 1103 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1104 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1105 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1106 rcount[irank]++; 1107 } 1108 PetscCall(PetscFree2(rcount, ranks)); 1109 PetscFunctionReturn(PETSC_SUCCESS); 1110 } 1111 1112 /*@C 1113 PetscSFGetGroups - gets incoming and outgoing process groups 1114 1115 Collective 1116 1117 Input Parameter: 1118 . sf - star forest 1119 1120 Output Parameters: 1121 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1122 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1123 1124 Level: developer 1125 1126 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1127 @*/ 1128 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1129 { 1130 MPI_Group group = MPI_GROUP_NULL; 1131 1132 PetscFunctionBegin; 1133 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1134 if (sf->ingroup == MPI_GROUP_NULL) { 1135 PetscInt i; 1136 const PetscInt *indegree; 1137 PetscMPIInt rank, *outranks, *inranks; 1138 PetscSFNode *remote; 1139 PetscSF bgcount; 1140 1141 /* Compute the number of incoming ranks */ 1142 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1143 for (i = 0; i < sf->nranks; i++) { 1144 remote[i].rank = sf->ranks[i]; 1145 remote[i].index = 0; 1146 } 1147 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1148 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1149 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1150 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1151 /* Enumerate the incoming ranks */ 1152 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1153 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1154 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1155 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1156 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1157 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1158 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1159 PetscCallMPI(MPI_Group_free(&group)); 1160 PetscCall(PetscFree2(inranks, outranks)); 1161 PetscCall(PetscSFDestroy(&bgcount)); 1162 } 1163 *incoming = sf->ingroup; 1164 1165 if (sf->outgroup == MPI_GROUP_NULL) { 1166 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1167 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1168 PetscCallMPI(MPI_Group_free(&group)); 1169 } 1170 *outgoing = sf->outgroup; 1171 PetscFunctionReturn(PETSC_SUCCESS); 1172 } 1173 1174 /*@ 1175 PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 1176 1177 Collective 1178 1179 Input Parameter: 1180 . sf - star forest 1181 1182 Output Parameter: 1183 . rsf - the star forest with a single root per process to perform communications 1184 1185 Level: developer 1186 1187 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 1188 @*/ 1189 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 1190 { 1191 PetscFunctionBegin; 1192 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1193 PetscAssertPointer(rsf, 2); 1194 if (!sf->rankssf) { 1195 PetscSFNode *rremotes; 1196 const PetscMPIInt *ranks; 1197 PetscInt nranks; 1198 1199 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 1200 PetscCall(PetscMalloc1(nranks, &rremotes)); 1201 for (PetscInt i = 0; i < nranks; i++) { 1202 rremotes[i].rank = ranks[i]; 1203 rremotes[i].index = 0; 1204 } 1205 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 1206 PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 1207 } 1208 *rsf = sf->rankssf; 1209 PetscFunctionReturn(PETSC_SUCCESS); 1210 } 1211 1212 /*@ 1213 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1214 1215 Collective 1216 1217 Input Parameter: 1218 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1219 1220 Output Parameter: 1221 . multi - star forest with split roots, such that each root has degree exactly 1 1222 1223 Level: developer 1224 1225 Note: 1226 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1227 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1228 edge, it is a candidate for future optimization that might involve its removal. 1229 1230 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1231 @*/ 1232 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1233 { 1234 PetscFunctionBegin; 1235 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1236 PetscAssertPointer(multi, 2); 1237 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1238 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1239 *multi = sf->multi; 1240 sf->multi->multi = sf->multi; 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 if (!sf->multi) { 1244 const PetscInt *indegree; 1245 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1246 PetscSFNode *remote; 1247 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1248 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1249 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1250 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1251 inoffset[0] = 0; 1252 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1253 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1254 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1255 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1256 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1257 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1258 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1259 } 1260 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1261 for (i = 0; i < sf->nleaves; i++) { 1262 remote[i].rank = sf->remote[i].rank; 1263 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1264 } 1265 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1266 sf->multi->multi = sf->multi; 1267 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1268 if (sf->rankorder) { /* Sort the ranks */ 1269 PetscMPIInt rank; 1270 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1271 PetscSFNode *newremote; 1272 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1273 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1274 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1275 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1276 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1277 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1278 /* Sort the incoming ranks at each vertex, build the inverse map */ 1279 for (i = 0; i < sf->nroots; i++) { 1280 PetscInt j; 1281 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1282 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1283 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1284 } 1285 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1286 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1287 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1288 for (i = 0; i < sf->nleaves; i++) { 1289 newremote[i].rank = sf->remote[i].rank; 1290 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1291 } 1292 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1293 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1294 } 1295 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1296 } 1297 *multi = sf->multi; 1298 PetscFunctionReturn(PETSC_SUCCESS); 1299 } 1300 1301 /*@C 1302 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1303 1304 Collective 1305 1306 Input Parameters: 1307 + sf - original star forest 1308 . nselected - number of selected roots on this process 1309 - selected - indices of the selected roots on this process 1310 1311 Output Parameter: 1312 . esf - new star forest 1313 1314 Level: advanced 1315 1316 Note: 1317 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1318 be done by calling PetscSFGetGraph(). 1319 1320 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1321 @*/ 1322 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1323 { 1324 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1325 const PetscInt *ilocal; 1326 signed char *rootdata, *leafdata, *leafmem; 1327 const PetscSFNode *iremote; 1328 PetscSFNode *new_iremote; 1329 MPI_Comm comm; 1330 1331 PetscFunctionBegin; 1332 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1333 PetscSFCheckGraphSet(sf, 1); 1334 if (nselected) PetscAssertPointer(selected, 3); 1335 PetscAssertPointer(esf, 4); 1336 1337 PetscCall(PetscSFSetUp(sf)); 1338 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1339 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1340 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1341 1342 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1343 PetscBool dups; 1344 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1345 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1346 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1347 } 1348 1349 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1350 else { 1351 /* A generic version of creating embedded sf */ 1352 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1353 maxlocal = maxleaf - minleaf + 1; 1354 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1355 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1356 /* Tag selected roots and bcast to leaves */ 1357 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1358 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1359 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1360 1361 /* Build esf with leaves that are still connected */ 1362 esf_nleaves = 0; 1363 for (i = 0; i < nleaves; i++) { 1364 j = ilocal ? ilocal[i] : i; 1365 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1366 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1367 */ 1368 esf_nleaves += (leafdata[j] ? 1 : 0); 1369 } 1370 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1371 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1372 for (i = n = 0; i < nleaves; i++) { 1373 j = ilocal ? ilocal[i] : i; 1374 if (leafdata[j]) { 1375 new_ilocal[n] = j; 1376 new_iremote[n].rank = iremote[i].rank; 1377 new_iremote[n].index = iremote[i].index; 1378 ++n; 1379 } 1380 } 1381 PetscCall(PetscSFCreate(comm, esf)); 1382 PetscCall(PetscSFSetFromOptions(*esf)); 1383 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1384 PetscCall(PetscFree2(rootdata, leafmem)); 1385 } 1386 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1387 PetscFunctionReturn(PETSC_SUCCESS); 1388 } 1389 1390 /*@C 1391 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1392 1393 Collective 1394 1395 Input Parameters: 1396 + sf - original star forest 1397 . nselected - number of selected leaves on this process 1398 - selected - indices of the selected leaves on this process 1399 1400 Output Parameter: 1401 . newsf - new star forest 1402 1403 Level: advanced 1404 1405 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1406 @*/ 1407 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1408 { 1409 const PetscSFNode *iremote; 1410 PetscSFNode *new_iremote; 1411 const PetscInt *ilocal; 1412 PetscInt i, nroots, *leaves, *new_ilocal; 1413 MPI_Comm comm; 1414 1415 PetscFunctionBegin; 1416 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1417 PetscSFCheckGraphSet(sf, 1); 1418 if (nselected) PetscAssertPointer(selected, 3); 1419 PetscAssertPointer(newsf, 4); 1420 1421 /* Uniq selected[] and put results in leaves[] */ 1422 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1423 PetscCall(PetscMalloc1(nselected, &leaves)); 1424 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1425 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1426 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1427 1428 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1429 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1430 else { 1431 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1432 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1433 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1434 for (i = 0; i < nselected; ++i) { 1435 const PetscInt l = leaves[i]; 1436 new_ilocal[i] = ilocal ? ilocal[l] : l; 1437 new_iremote[i].rank = iremote[l].rank; 1438 new_iremote[i].index = iremote[l].index; 1439 } 1440 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1441 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1442 } 1443 PetscCall(PetscFree(leaves)); 1444 PetscFunctionReturn(PETSC_SUCCESS); 1445 } 1446 1447 /*@C 1448 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1449 1450 Collective 1451 1452 Input Parameters: 1453 + sf - star forest on which to communicate 1454 . unit - data type associated with each node 1455 . rootdata - buffer to broadcast 1456 - op - operation to use for reduction 1457 1458 Output Parameter: 1459 . leafdata - buffer to be reduced with values from each leaf's respective root 1460 1461 Level: intermediate 1462 1463 Note: 1464 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1465 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1466 use `PetscSFBcastWithMemTypeBegin()` instead. 1467 1468 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1469 @*/ 1470 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1471 { 1472 PetscMemType rootmtype, leafmtype; 1473 1474 PetscFunctionBegin; 1475 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1476 PetscCall(PetscSFSetUp(sf)); 1477 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1478 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1479 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1480 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1481 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1482 PetscFunctionReturn(PETSC_SUCCESS); 1483 } 1484 1485 /*@C 1486 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1487 to `PetscSFBcastEnd()` 1488 1489 Collective 1490 1491 Input Parameters: 1492 + sf - star forest on which to communicate 1493 . unit - data type associated with each node 1494 . rootmtype - memory type of rootdata 1495 . rootdata - buffer to broadcast 1496 . leafmtype - memory type of leafdata 1497 - op - operation to use for reduction 1498 1499 Output Parameter: 1500 . leafdata - buffer to be reduced with values from each leaf's respective root 1501 1502 Level: intermediate 1503 1504 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1505 @*/ 1506 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1507 { 1508 PetscFunctionBegin; 1509 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1510 PetscCall(PetscSFSetUp(sf)); 1511 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1512 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1513 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1514 PetscFunctionReturn(PETSC_SUCCESS); 1515 } 1516 1517 /*@C 1518 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1519 1520 Collective 1521 1522 Input Parameters: 1523 + sf - star forest 1524 . unit - data type 1525 . rootdata - buffer to broadcast 1526 - op - operation to use for reduction 1527 1528 Output Parameter: 1529 . leafdata - buffer to be reduced with values from each leaf's respective root 1530 1531 Level: intermediate 1532 1533 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1534 @*/ 1535 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1536 { 1537 PetscFunctionBegin; 1538 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1539 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1540 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1541 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1542 PetscFunctionReturn(PETSC_SUCCESS); 1543 } 1544 1545 /*@C 1546 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1547 1548 Collective 1549 1550 Input Parameters: 1551 + sf - star forest 1552 . unit - data type 1553 . leafdata - values to reduce 1554 - op - reduction operation 1555 1556 Output Parameter: 1557 . rootdata - result of reduction of values from all leaves of each root 1558 1559 Level: intermediate 1560 1561 Note: 1562 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1563 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1564 use `PetscSFReduceWithMemTypeBegin()` instead. 1565 1566 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 1567 @*/ 1568 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1569 { 1570 PetscMemType rootmtype, leafmtype; 1571 1572 PetscFunctionBegin; 1573 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1574 PetscCall(PetscSFSetUp(sf)); 1575 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1576 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1577 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1578 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1579 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1580 PetscFunctionReturn(PETSC_SUCCESS); 1581 } 1582 1583 /*@C 1584 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1585 1586 Collective 1587 1588 Input Parameters: 1589 + sf - star forest 1590 . unit - data type 1591 . leafmtype - memory type of leafdata 1592 . leafdata - values to reduce 1593 . rootmtype - memory type of rootdata 1594 - op - reduction operation 1595 1596 Output Parameter: 1597 . rootdata - result of reduction of values from all leaves of each root 1598 1599 Level: intermediate 1600 1601 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1602 @*/ 1603 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1604 { 1605 PetscFunctionBegin; 1606 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1607 PetscCall(PetscSFSetUp(sf)); 1608 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1609 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1610 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1611 PetscFunctionReturn(PETSC_SUCCESS); 1612 } 1613 1614 /*@C 1615 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1616 1617 Collective 1618 1619 Input Parameters: 1620 + sf - star forest 1621 . unit - data type 1622 . leafdata - values to reduce 1623 - op - reduction operation 1624 1625 Output Parameter: 1626 . rootdata - result of reduction of values from all leaves of each root 1627 1628 Level: intermediate 1629 1630 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1631 @*/ 1632 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1633 { 1634 PetscFunctionBegin; 1635 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1636 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1637 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1638 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1639 PetscFunctionReturn(PETSC_SUCCESS); 1640 } 1641 1642 /*@C 1643 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1644 to be completed with `PetscSFFetchAndOpEnd()` 1645 1646 Collective 1647 1648 Input Parameters: 1649 + sf - star forest 1650 . unit - data type 1651 . leafdata - leaf values to use in reduction 1652 - op - operation to use for reduction 1653 1654 Output Parameters: 1655 + rootdata - root values to be updated, input state is seen by first process to perform an update 1656 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1657 1658 Level: advanced 1659 1660 Note: 1661 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1662 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1663 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1664 integers. 1665 1666 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1667 @*/ 1668 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1669 { 1670 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1671 1672 PetscFunctionBegin; 1673 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1674 PetscCall(PetscSFSetUp(sf)); 1675 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1676 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1677 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1678 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1679 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1680 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1681 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1682 PetscFunctionReturn(PETSC_SUCCESS); 1683 } 1684 1685 /*@C 1686 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1687 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1688 1689 Collective 1690 1691 Input Parameters: 1692 + sf - star forest 1693 . unit - data type 1694 . rootmtype - memory type of rootdata 1695 . leafmtype - memory type of leafdata 1696 . leafdata - leaf values to use in reduction 1697 . leafupdatemtype - memory type of leafupdate 1698 - op - operation to use for reduction 1699 1700 Output Parameters: 1701 + rootdata - root values to be updated, input state is seen by first process to perform an update 1702 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1703 1704 Level: advanced 1705 1706 Note: 1707 See `PetscSFFetchAndOpBegin()` for more details. 1708 1709 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1710 @*/ 1711 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1712 { 1713 PetscFunctionBegin; 1714 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1715 PetscCall(PetscSFSetUp(sf)); 1716 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1717 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1718 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1719 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1720 PetscFunctionReturn(PETSC_SUCCESS); 1721 } 1722 1723 /*@C 1724 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1725 to fetch values from roots and update atomically by applying operation using my leaf value 1726 1727 Collective 1728 1729 Input Parameters: 1730 + sf - star forest 1731 . unit - data type 1732 . leafdata - leaf values to use in reduction 1733 - op - operation to use for reduction 1734 1735 Output Parameters: 1736 + rootdata - root values to be updated, input state is seen by first process to perform an update 1737 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1738 1739 Level: advanced 1740 1741 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1742 @*/ 1743 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1744 { 1745 PetscFunctionBegin; 1746 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1747 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1748 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1749 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1750 PetscFunctionReturn(PETSC_SUCCESS); 1751 } 1752 1753 /*@C 1754 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1755 1756 Collective 1757 1758 Input Parameter: 1759 . sf - star forest 1760 1761 Output Parameter: 1762 . degree - degree of each root vertex 1763 1764 Level: advanced 1765 1766 Note: 1767 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1768 1769 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1770 @*/ 1771 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) 1772 { 1773 PetscFunctionBegin; 1774 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1775 PetscSFCheckGraphSet(sf, 1); 1776 PetscAssertPointer(degree, 2); 1777 if (!sf->degreeknown) { 1778 PetscInt i, nroots = sf->nroots, maxlocal; 1779 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1780 maxlocal = sf->maxleaf - sf->minleaf + 1; 1781 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1782 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1783 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1784 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1785 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1786 } 1787 *degree = NULL; 1788 PetscFunctionReturn(PETSC_SUCCESS); 1789 } 1790 1791 /*@C 1792 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1793 1794 Collective 1795 1796 Input Parameter: 1797 . sf - star forest 1798 1799 Output Parameter: 1800 . degree - degree of each root vertex 1801 1802 Level: developer 1803 1804 Note: 1805 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1806 1807 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1808 @*/ 1809 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1810 { 1811 PetscFunctionBegin; 1812 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1813 PetscSFCheckGraphSet(sf, 1); 1814 PetscAssertPointer(degree, 2); 1815 if (!sf->degreeknown) { 1816 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1817 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1818 PetscCall(PetscFree(sf->degreetmp)); 1819 sf->degreeknown = PETSC_TRUE; 1820 } 1821 *degree = sf->degree; 1822 PetscFunctionReturn(PETSC_SUCCESS); 1823 } 1824 1825 /*@C 1826 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1827 Each multi-root is assigned index of the corresponding original root. 1828 1829 Collective 1830 1831 Input Parameters: 1832 + sf - star forest 1833 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1834 1835 Output Parameters: 1836 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1837 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1838 1839 Level: developer 1840 1841 Note: 1842 The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1843 1844 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1845 @*/ 1846 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1847 { 1848 PetscSF msf; 1849 PetscInt i, j, k, nroots, nmroots; 1850 1851 PetscFunctionBegin; 1852 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1853 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1854 if (nroots) PetscAssertPointer(degree, 2); 1855 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1856 PetscAssertPointer(multiRootsOrigNumbering, 4); 1857 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1858 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1859 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1860 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1861 if (!degree[i]) continue; 1862 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1863 } 1864 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1865 if (nMultiRoots) *nMultiRoots = nmroots; 1866 PetscFunctionReturn(PETSC_SUCCESS); 1867 } 1868 1869 /*@C 1870 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1871 1872 Collective 1873 1874 Input Parameters: 1875 + sf - star forest 1876 . unit - data type 1877 - leafdata - leaf data to gather to roots 1878 1879 Output Parameter: 1880 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1881 1882 Level: intermediate 1883 1884 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1885 @*/ 1886 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1887 { 1888 PetscSF multi = NULL; 1889 1890 PetscFunctionBegin; 1891 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1892 PetscCall(PetscSFSetUp(sf)); 1893 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1894 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1895 PetscFunctionReturn(PETSC_SUCCESS); 1896 } 1897 1898 /*@C 1899 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1900 1901 Collective 1902 1903 Input Parameters: 1904 + sf - star forest 1905 . unit - data type 1906 - leafdata - leaf data to gather to roots 1907 1908 Output Parameter: 1909 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1910 1911 Level: intermediate 1912 1913 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1914 @*/ 1915 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1916 { 1917 PetscSF multi = NULL; 1918 1919 PetscFunctionBegin; 1920 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1921 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1922 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1923 PetscFunctionReturn(PETSC_SUCCESS); 1924 } 1925 1926 /*@C 1927 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1928 1929 Collective 1930 1931 Input Parameters: 1932 + sf - star forest 1933 . unit - data type 1934 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1935 1936 Output Parameter: 1937 . leafdata - leaf data to be update with personal data from each respective root 1938 1939 Level: intermediate 1940 1941 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 1942 @*/ 1943 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1944 { 1945 PetscSF multi = NULL; 1946 1947 PetscFunctionBegin; 1948 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1949 PetscCall(PetscSFSetUp(sf)); 1950 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1951 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1952 PetscFunctionReturn(PETSC_SUCCESS); 1953 } 1954 1955 /*@C 1956 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1957 1958 Collective 1959 1960 Input Parameters: 1961 + sf - star forest 1962 . unit - data type 1963 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1964 1965 Output Parameter: 1966 . leafdata - leaf data to be update with personal data from each respective root 1967 1968 Level: intermediate 1969 1970 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1971 @*/ 1972 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1973 { 1974 PetscSF multi = NULL; 1975 1976 PetscFunctionBegin; 1977 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1978 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1979 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1980 PetscFunctionReturn(PETSC_SUCCESS); 1981 } 1982 1983 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1984 { 1985 PetscInt i, n, nleaves; 1986 const PetscInt *ilocal = NULL; 1987 PetscHSetI seen; 1988 1989 PetscFunctionBegin; 1990 if (PetscDefined(USE_DEBUG)) { 1991 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1992 PetscCall(PetscHSetICreate(&seen)); 1993 for (i = 0; i < nleaves; i++) { 1994 const PetscInt leaf = ilocal ? ilocal[i] : i; 1995 PetscCall(PetscHSetIAdd(seen, leaf)); 1996 } 1997 PetscCall(PetscHSetIGetSize(seen, &n)); 1998 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 1999 PetscCall(PetscHSetIDestroy(&seen)); 2000 } 2001 PetscFunctionReturn(PETSC_SUCCESS); 2002 } 2003 2004 /*@ 2005 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2006 2007 Input Parameters: 2008 + sfA - The first `PetscSF` 2009 - sfB - The second `PetscSF` 2010 2011 Output Parameter: 2012 . sfBA - The composite `PetscSF` 2013 2014 Level: developer 2015 2016 Notes: 2017 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2018 forests, i.e. the same leaf is not connected with different roots. 2019 2020 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 2021 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 2022 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 2023 `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 2024 2025 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2026 @*/ 2027 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2028 { 2029 const PetscSFNode *remotePointsA, *remotePointsB; 2030 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 2031 const PetscInt *localPointsA, *localPointsB; 2032 PetscInt *localPointsBA; 2033 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 2034 PetscBool denseB; 2035 2036 PetscFunctionBegin; 2037 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2038 PetscSFCheckGraphSet(sfA, 1); 2039 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2040 PetscSFCheckGraphSet(sfB, 2); 2041 PetscCheckSameComm(sfA, 1, sfB, 2); 2042 PetscAssertPointer(sfBA, 3); 2043 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2044 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2045 2046 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2047 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2048 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2049 numRootsB; otherwise, garbage will be broadcasted. 2050 Example (comm size = 1): 2051 sfA: 0 <- (0, 0) 2052 sfB: 100 <- (0, 0) 2053 101 <- (0, 1) 2054 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2055 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2056 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2057 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2058 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2059 for (i = 0; i < numRootsB; i++) { 2060 reorderedRemotePointsA[i].rank = -1; 2061 reorderedRemotePointsA[i].index = -1; 2062 } 2063 for (i = 0; i < numLeavesA; i++) { 2064 PetscInt localp = localPointsA ? localPointsA[i] : i; 2065 2066 if (localp >= numRootsB) continue; 2067 reorderedRemotePointsA[localp] = remotePointsA[i]; 2068 } 2069 remotePointsA = reorderedRemotePointsA; 2070 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2071 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2072 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2073 leafdataB[i].rank = -1; 2074 leafdataB[i].index = -1; 2075 } 2076 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2077 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2078 PetscCall(PetscFree(reorderedRemotePointsA)); 2079 2080 denseB = (PetscBool)!localPointsB; 2081 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2082 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2083 else numLeavesBA++; 2084 } 2085 if (denseB) { 2086 localPointsBA = NULL; 2087 remotePointsBA = leafdataB; 2088 } else { 2089 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2090 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2091 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2092 const PetscInt l = localPointsB ? localPointsB[i] : i; 2093 2094 if (leafdataB[l - minleaf].rank == -1) continue; 2095 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2096 localPointsBA[numLeavesBA] = l; 2097 numLeavesBA++; 2098 } 2099 PetscCall(PetscFree(leafdataB)); 2100 } 2101 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2102 PetscCall(PetscSFSetFromOptions(*sfBA)); 2103 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2104 PetscFunctionReturn(PETSC_SUCCESS); 2105 } 2106 2107 /*@ 2108 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2109 2110 Input Parameters: 2111 + sfA - The first `PetscSF` 2112 - sfB - The second `PetscSF` 2113 2114 Output Parameter: 2115 . sfBA - The composite `PetscSF`. 2116 2117 Level: developer 2118 2119 Notes: 2120 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2121 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2122 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2123 2124 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2125 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2126 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 2127 on `sfA`, then 2128 a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 2129 2130 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2131 @*/ 2132 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2133 { 2134 const PetscSFNode *remotePointsA, *remotePointsB; 2135 PetscSFNode *remotePointsBA; 2136 const PetscInt *localPointsA, *localPointsB; 2137 PetscSFNode *reorderedRemotePointsA = NULL; 2138 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2139 MPI_Op op; 2140 #if defined(PETSC_USE_64BIT_INDICES) 2141 PetscBool iswin; 2142 #endif 2143 2144 PetscFunctionBegin; 2145 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2146 PetscSFCheckGraphSet(sfA, 1); 2147 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2148 PetscSFCheckGraphSet(sfB, 2); 2149 PetscCheckSameComm(sfA, 1, sfB, 2); 2150 PetscAssertPointer(sfBA, 3); 2151 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2152 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2153 2154 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2155 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2156 2157 /* TODO: Check roots of sfB have degree of 1 */ 2158 /* Once we implement it, we can replace the MPI_MAXLOC 2159 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2160 We use MPI_MAXLOC only to have a deterministic output from this routine if 2161 the root condition is not meet. 2162 */ 2163 op = MPI_MAXLOC; 2164 #if defined(PETSC_USE_64BIT_INDICES) 2165 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2166 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2167 if (iswin) op = MPI_REPLACE; 2168 #endif 2169 2170 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2171 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2172 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2173 reorderedRemotePointsA[i].rank = -1; 2174 reorderedRemotePointsA[i].index = -1; 2175 } 2176 if (localPointsA) { 2177 for (i = 0; i < numLeavesA; i++) { 2178 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2179 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2180 } 2181 } else { 2182 for (i = 0; i < numLeavesA; i++) { 2183 if (i > maxleaf || i < minleaf) continue; 2184 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2185 } 2186 } 2187 2188 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2189 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2190 for (i = 0; i < numRootsB; i++) { 2191 remotePointsBA[i].rank = -1; 2192 remotePointsBA[i].index = -1; 2193 } 2194 2195 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2196 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2197 PetscCall(PetscFree(reorderedRemotePointsA)); 2198 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2199 if (remotePointsBA[i].rank == -1) continue; 2200 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2201 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2202 localPointsBA[numLeavesBA] = i; 2203 numLeavesBA++; 2204 } 2205 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2206 PetscCall(PetscSFSetFromOptions(*sfBA)); 2207 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2208 PetscFunctionReturn(PETSC_SUCCESS); 2209 } 2210 2211 /* 2212 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2213 2214 Input Parameter: 2215 . sf - The global `PetscSF` 2216 2217 Output Parameter: 2218 . out - The local `PetscSF` 2219 2220 .seealso: `PetscSF`, `PetscSFCreate()` 2221 */ 2222 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2223 { 2224 MPI_Comm comm; 2225 PetscMPIInt myrank; 2226 const PetscInt *ilocal; 2227 const PetscSFNode *iremote; 2228 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2229 PetscSFNode *liremote; 2230 PetscSF lsf; 2231 2232 PetscFunctionBegin; 2233 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2234 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2235 else { 2236 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2237 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2238 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2239 2240 /* Find out local edges and build a local SF */ 2241 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2242 for (i = lnleaves = 0; i < nleaves; i++) { 2243 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2244 } 2245 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2246 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2247 2248 for (i = j = 0; i < nleaves; i++) { 2249 if (iremote[i].rank == (PetscInt)myrank) { 2250 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2251 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2252 liremote[j].index = iremote[i].index; 2253 j++; 2254 } 2255 } 2256 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2257 PetscCall(PetscSFSetFromOptions(lsf)); 2258 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2259 PetscCall(PetscSFSetUp(lsf)); 2260 *out = lsf; 2261 } 2262 PetscFunctionReturn(PETSC_SUCCESS); 2263 } 2264 2265 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2266 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2267 { 2268 PetscMemType rootmtype, leafmtype; 2269 2270 PetscFunctionBegin; 2271 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2272 PetscCall(PetscSFSetUp(sf)); 2273 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2274 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2275 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2276 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2277 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2278 PetscFunctionReturn(PETSC_SUCCESS); 2279 } 2280 2281 /*@ 2282 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2283 2284 Input Parameters: 2285 + comm - the communicator 2286 . nsfs - the number of input `PetscSF` 2287 . sfs - the array of input `PetscSF` 2288 . rootMode - the root mode specifying how roots are handled 2289 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2290 2291 Output Parameter: 2292 . newsf - The resulting `PetscSF` 2293 2294 Level: advanced 2295 2296 Notes: 2297 The communicator of all `PetscSF`s in `sfs` must be comm. 2298 2299 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2300 2301 The offsets in `leafOffsets` are added to the original leaf indices. 2302 2303 If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2304 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2305 2306 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2307 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2308 2309 All root modes retain the essential connectivity condition. 2310 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2311 Parameter `rootMode` controls how the input root spaces are combined. 2312 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2313 and is also the same in the output `PetscSF`. 2314 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2315 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2316 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2317 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2318 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2319 the original root ranks are ignored. 2320 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2321 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 2322 to keep the load balancing. 2323 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 2324 2325 Example: 2326 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2327 .vb 2328 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2329 for m in {local,global,shared}; do 2330 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2331 done 2332 .ve 2333 we generate two identical `PetscSF`s sf_0 and sf_1, 2334 .vb 2335 PetscSF Object: sf_0 2 MPI processes 2336 type: basic 2337 rank #leaves #roots 2338 [ 0] 4 2 2339 [ 1] 4 2 2340 leaves roots roots in global numbering 2341 ( 0, 0) <- ( 0, 0) = 0 2342 ( 0, 1) <- ( 0, 1) = 1 2343 ( 0, 2) <- ( 1, 0) = 2 2344 ( 0, 3) <- ( 1, 1) = 3 2345 ( 1, 0) <- ( 0, 0) = 0 2346 ( 1, 1) <- ( 0, 1) = 1 2347 ( 1, 2) <- ( 1, 0) = 2 2348 ( 1, 3) <- ( 1, 1) = 3 2349 .ve 2350 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2351 .vb 2352 rootMode = local: 2353 PetscSF Object: result_sf 2 MPI processes 2354 type: basic 2355 rank #leaves #roots 2356 [ 0] 8 4 2357 [ 1] 8 4 2358 leaves roots roots in global numbering 2359 ( 0, 0) <- ( 0, 0) = 0 2360 ( 0, 1) <- ( 0, 1) = 1 2361 ( 0, 2) <- ( 1, 0) = 4 2362 ( 0, 3) <- ( 1, 1) = 5 2363 ( 0, 4) <- ( 0, 2) = 2 2364 ( 0, 5) <- ( 0, 3) = 3 2365 ( 0, 6) <- ( 1, 2) = 6 2366 ( 0, 7) <- ( 1, 3) = 7 2367 ( 1, 0) <- ( 0, 0) = 0 2368 ( 1, 1) <- ( 0, 1) = 1 2369 ( 1, 2) <- ( 1, 0) = 4 2370 ( 1, 3) <- ( 1, 1) = 5 2371 ( 1, 4) <- ( 0, 2) = 2 2372 ( 1, 5) <- ( 0, 3) = 3 2373 ( 1, 6) <- ( 1, 2) = 6 2374 ( 1, 7) <- ( 1, 3) = 7 2375 2376 rootMode = global: 2377 PetscSF Object: result_sf 2 MPI processes 2378 type: basic 2379 rank #leaves #roots 2380 [ 0] 8 4 2381 [ 1] 8 4 2382 leaves roots roots in global numbering 2383 ( 0, 0) <- ( 0, 0) = 0 2384 ( 0, 1) <- ( 0, 1) = 1 2385 ( 0, 2) <- ( 0, 2) = 2 2386 ( 0, 3) <- ( 0, 3) = 3 2387 ( 0, 4) <- ( 1, 0) = 4 2388 ( 0, 5) <- ( 1, 1) = 5 2389 ( 0, 6) <- ( 1, 2) = 6 2390 ( 0, 7) <- ( 1, 3) = 7 2391 ( 1, 0) <- ( 0, 0) = 0 2392 ( 1, 1) <- ( 0, 1) = 1 2393 ( 1, 2) <- ( 0, 2) = 2 2394 ( 1, 3) <- ( 0, 3) = 3 2395 ( 1, 4) <- ( 1, 0) = 4 2396 ( 1, 5) <- ( 1, 1) = 5 2397 ( 1, 6) <- ( 1, 2) = 6 2398 ( 1, 7) <- ( 1, 3) = 7 2399 2400 rootMode = shared: 2401 PetscSF Object: result_sf 2 MPI processes 2402 type: basic 2403 rank #leaves #roots 2404 [ 0] 8 2 2405 [ 1] 8 2 2406 leaves roots roots in global numbering 2407 ( 0, 0) <- ( 0, 0) = 0 2408 ( 0, 1) <- ( 0, 1) = 1 2409 ( 0, 2) <- ( 1, 0) = 2 2410 ( 0, 3) <- ( 1, 1) = 3 2411 ( 0, 4) <- ( 0, 0) = 0 2412 ( 0, 5) <- ( 0, 1) = 1 2413 ( 0, 6) <- ( 1, 0) = 2 2414 ( 0, 7) <- ( 1, 1) = 3 2415 ( 1, 0) <- ( 0, 0) = 0 2416 ( 1, 1) <- ( 0, 1) = 1 2417 ( 1, 2) <- ( 1, 0) = 2 2418 ( 1, 3) <- ( 1, 1) = 3 2419 ( 1, 4) <- ( 0, 0) = 0 2420 ( 1, 5) <- ( 0, 1) = 1 2421 ( 1, 6) <- ( 1, 0) = 2 2422 ( 1, 7) <- ( 1, 1) = 3 2423 .ve 2424 2425 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2426 @*/ 2427 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2428 { 2429 PetscInt i, s, nLeaves, nRoots; 2430 PetscInt *leafArrayOffsets; 2431 PetscInt *ilocal_new; 2432 PetscSFNode *iremote_new; 2433 PetscBool all_ilocal_null = PETSC_FALSE; 2434 PetscLayout glayout = NULL; 2435 PetscInt *gremote = NULL; 2436 PetscMPIInt rank, size; 2437 2438 PetscFunctionBegin; 2439 if (PetscDefined(USE_DEBUG)) { 2440 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2441 2442 PetscCall(PetscSFCreate(comm, &dummy)); 2443 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2444 PetscAssertPointer(sfs, 3); 2445 for (i = 0; i < nsfs; i++) { 2446 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2447 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2448 } 2449 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2450 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2451 PetscAssertPointer(newsf, 6); 2452 PetscCall(PetscSFDestroy(&dummy)); 2453 } 2454 if (!nsfs) { 2455 PetscCall(PetscSFCreate(comm, newsf)); 2456 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2457 PetscFunctionReturn(PETSC_SUCCESS); 2458 } 2459 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2460 PetscCallMPI(MPI_Comm_size(comm, &size)); 2461 2462 /* Calculate leaf array offsets */ 2463 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2464 leafArrayOffsets[0] = 0; 2465 for (s = 0; s < nsfs; s++) { 2466 PetscInt nl; 2467 2468 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2469 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2470 } 2471 nLeaves = leafArrayOffsets[nsfs]; 2472 2473 /* Calculate number of roots */ 2474 switch (rootMode) { 2475 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2476 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2477 if (PetscDefined(USE_DEBUG)) { 2478 for (s = 1; s < nsfs; s++) { 2479 PetscInt nr; 2480 2481 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2482 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2483 } 2484 } 2485 } break; 2486 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2487 /* Calculate also global layout in this case */ 2488 PetscInt *nls; 2489 PetscLayout *lts; 2490 PetscInt **inds; 2491 PetscInt j; 2492 PetscInt rootOffset = 0; 2493 2494 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2495 PetscCall(PetscLayoutCreate(comm, &glayout)); 2496 glayout->bs = 1; 2497 glayout->n = 0; 2498 glayout->N = 0; 2499 for (s = 0; s < nsfs; s++) { 2500 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2501 glayout->n += lts[s]->n; 2502 glayout->N += lts[s]->N; 2503 } 2504 PetscCall(PetscLayoutSetUp(glayout)); 2505 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2506 for (s = 0, j = 0; s < nsfs; s++) { 2507 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2508 rootOffset += lts[s]->N; 2509 PetscCall(PetscLayoutDestroy(<s[s])); 2510 PetscCall(PetscFree(inds[s])); 2511 } 2512 PetscCall(PetscFree3(lts, nls, inds)); 2513 nRoots = glayout->N; 2514 } break; 2515 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2516 /* nRoots calculated later in this case */ 2517 break; 2518 default: 2519 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2520 } 2521 2522 if (!leafOffsets) { 2523 all_ilocal_null = PETSC_TRUE; 2524 for (s = 0; s < nsfs; s++) { 2525 const PetscInt *ilocal; 2526 2527 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2528 if (ilocal) { 2529 all_ilocal_null = PETSC_FALSE; 2530 break; 2531 } 2532 } 2533 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2534 } 2535 2536 /* Renumber and concatenate local leaves */ 2537 ilocal_new = NULL; 2538 if (!all_ilocal_null) { 2539 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2540 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2541 for (s = 0; s < nsfs; s++) { 2542 const PetscInt *ilocal; 2543 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2544 PetscInt i, nleaves_l; 2545 2546 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2547 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2548 } 2549 } 2550 2551 /* Renumber and concatenate remote roots */ 2552 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2553 PetscInt rootOffset = 0; 2554 2555 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2556 for (i = 0; i < nLeaves; i++) { 2557 iremote_new[i].rank = -1; 2558 iremote_new[i].index = -1; 2559 } 2560 for (s = 0; s < nsfs; s++) { 2561 PetscInt i, nl, nr; 2562 PetscSF tmp_sf; 2563 const PetscSFNode *iremote; 2564 PetscSFNode *tmp_rootdata; 2565 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2566 2567 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2568 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2569 /* create helper SF with contiguous leaves */ 2570 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2571 PetscCall(PetscSFSetUp(tmp_sf)); 2572 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2573 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2574 for (i = 0; i < nr; i++) { 2575 tmp_rootdata[i].index = i + rootOffset; 2576 tmp_rootdata[i].rank = (PetscInt)rank; 2577 } 2578 rootOffset += nr; 2579 } else { 2580 for (i = 0; i < nr; i++) { 2581 tmp_rootdata[i].index = i; 2582 tmp_rootdata[i].rank = (PetscInt)rank; 2583 } 2584 } 2585 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2586 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2587 PetscCall(PetscSFDestroy(&tmp_sf)); 2588 PetscCall(PetscFree(tmp_rootdata)); 2589 } 2590 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2591 2592 /* Build the new SF */ 2593 PetscCall(PetscSFCreate(comm, newsf)); 2594 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2595 } else { 2596 /* Build the new SF */ 2597 PetscCall(PetscSFCreate(comm, newsf)); 2598 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2599 } 2600 PetscCall(PetscSFSetUp(*newsf)); 2601 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2602 PetscCall(PetscLayoutDestroy(&glayout)); 2603 PetscCall(PetscFree(gremote)); 2604 PetscCall(PetscFree(leafArrayOffsets)); 2605 PetscFunctionReturn(PETSC_SUCCESS); 2606 } 2607