1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #include <petscdevice_cuda.h> 9 #endif 10 11 #if defined(PETSC_HAVE_HIP) 12 #include <hip/hip_runtime.h> 13 #endif 14 15 #if defined(PETSC_CLANG_STATIC_ANALYZER) 16 extern void PetscSFCheckGraphSet(PetscSF, int); 17 #else 18 #if defined(PETSC_USE_DEBUG) 19 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 20 #else 21 #define PetscSFCheckGraphSet(sf, arg) \ 22 do { \ 23 } while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 29 30 /*@ 31 PetscSFCreate - create a star forest communication context 32 33 Collective 34 35 Input Parameter: 36 . comm - communicator on which the star forest will operate 37 38 Output Parameter: 39 . sf - new star forest context 40 41 Options Database Key: 42 + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default) 43 . -sf_type window - Use MPI-3 one-sided window for communication 44 . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication 45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor) 46 47 Level: intermediate 48 49 Note: 50 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 51 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 52 `SF`s are optimized and they have better performance than the general `SF`s. 53 54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 55 @*/ 56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 57 { 58 PetscSF b; 59 60 PetscFunctionBegin; 61 PetscAssertPointer(sf, 2); 62 PetscCall(PetscSFInitializePackage()); 63 64 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 65 66 b->nroots = -1; 67 b->nleaves = -1; 68 b->minleaf = PETSC_MAX_INT; 69 b->maxleaf = PETSC_MIN_INT; 70 b->nranks = -1; 71 b->rankorder = PETSC_TRUE; 72 b->ingroup = MPI_GROUP_NULL; 73 b->outgroup = MPI_GROUP_NULL; 74 b->graphset = PETSC_FALSE; 75 #if defined(PETSC_HAVE_DEVICE) 76 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 77 b->use_stream_aware_mpi = PETSC_FALSE; 78 b->unknown_input_stream = PETSC_FALSE; 79 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 80 b->backend = PETSCSF_BACKEND_KOKKOS; 81 #elif defined(PETSC_HAVE_CUDA) 82 b->backend = PETSCSF_BACKEND_CUDA; 83 #elif defined(PETSC_HAVE_HIP) 84 b->backend = PETSCSF_BACKEND_HIP; 85 #endif 86 87 #if defined(PETSC_HAVE_NVSHMEM) 88 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 89 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 90 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 91 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 92 #endif 93 #endif 94 b->vscat.from_n = -1; 95 b->vscat.to_n = -1; 96 b->vscat.unit = MPIU_SCALAR; 97 *sf = b; 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 /*@ 102 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 103 104 Collective 105 106 Input Parameter: 107 . sf - star forest 108 109 Level: advanced 110 111 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 112 @*/ 113 PetscErrorCode PetscSFReset(PetscSF sf) 114 { 115 PetscFunctionBegin; 116 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 117 PetscTryTypeMethod(sf, Reset); 118 PetscCall(PetscSFDestroy(&sf->rankssf)); 119 120 sf->nroots = -1; 121 sf->nleaves = -1; 122 sf->minleaf = PETSC_MAX_INT; 123 sf->maxleaf = PETSC_MIN_INT; 124 sf->mine = NULL; 125 sf->remote = NULL; 126 sf->graphset = PETSC_FALSE; 127 PetscCall(PetscFree(sf->mine_alloc)); 128 PetscCall(PetscFree(sf->remote_alloc)); 129 sf->nranks = -1; 130 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 131 sf->degreeknown = PETSC_FALSE; 132 PetscCall(PetscFree(sf->degree)); 133 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 134 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 135 136 if (sf->multi) sf->multi->multi = NULL; 137 PetscCall(PetscSFDestroy(&sf->multi)); 138 139 PetscCall(PetscLayoutDestroy(&sf->map)); 140 141 #if defined(PETSC_HAVE_DEVICE) 142 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 143 #endif 144 145 sf->setupcalled = PETSC_FALSE; 146 PetscFunctionReturn(PETSC_SUCCESS); 147 } 148 149 /*@ 150 PetscSFSetType - Set the `PetscSF` communication implementation 151 152 Collective 153 154 Input Parameters: 155 + sf - the `PetscSF` context 156 - type - a known method 157 .vb 158 PETSCSFWINDOW - MPI-2/3 one-sided 159 PETSCSFBASIC - basic implementation using MPI-1 two-sided 160 .ve 161 162 Options Database Key: 163 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 164 165 Level: intermediate 166 167 Notes: 168 See `PetscSFType` for possible values 169 170 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 171 @*/ 172 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 173 { 174 PetscBool match; 175 PetscErrorCode (*r)(PetscSF); 176 177 PetscFunctionBegin; 178 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 179 PetscAssertPointer(type, 2); 180 181 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 182 if (match) PetscFunctionReturn(PETSC_SUCCESS); 183 184 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 185 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 186 /* Destroy the previous PetscSF implementation context */ 187 PetscTryTypeMethod(sf, Destroy); 188 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 189 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 190 PetscCall((*r)(sf)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 /*@ 195 PetscSFGetType - Get the `PetscSF` communication implementation 196 197 Not Collective 198 199 Input Parameter: 200 . sf - the `PetscSF` context 201 202 Output Parameter: 203 . type - the `PetscSF` type name 204 205 Level: intermediate 206 207 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 208 @*/ 209 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 210 { 211 PetscFunctionBegin; 212 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 213 PetscAssertPointer(type, 2); 214 *type = ((PetscObject)sf)->type_name; 215 PetscFunctionReturn(PETSC_SUCCESS); 216 } 217 218 /*@C 219 PetscSFDestroy - destroy a star forest 220 221 Collective 222 223 Input Parameter: 224 . sf - address of star forest 225 226 Level: intermediate 227 228 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 229 @*/ 230 PetscErrorCode PetscSFDestroy(PetscSF *sf) 231 { 232 PetscFunctionBegin; 233 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 234 PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1); 235 if (--((PetscObject)*sf)->refct > 0) { 236 *sf = NULL; 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 PetscCall(PetscSFReset(*sf)); 240 PetscTryTypeMethod(*sf, Destroy); 241 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 242 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 243 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 244 if ((*sf)->use_stream_aware_mpi) { 245 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 246 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 247 } 248 #endif 249 PetscCall(PetscHeaderDestroy(sf)); 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 254 { 255 PetscInt i, nleaves; 256 PetscMPIInt size; 257 const PetscInt *ilocal; 258 const PetscSFNode *iremote; 259 260 PetscFunctionBegin; 261 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 262 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 263 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 264 for (i = 0; i < nleaves; i++) { 265 const PetscInt rank = iremote[i].rank; 266 const PetscInt remote = iremote[i].index; 267 const PetscInt leaf = ilocal ? ilocal[i] : i; 268 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 269 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 270 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 271 } 272 PetscFunctionReturn(PETSC_SUCCESS); 273 } 274 275 /*@ 276 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 277 278 Collective 279 280 Input Parameter: 281 . sf - star forest communication object 282 283 Level: beginner 284 285 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 286 @*/ 287 PetscErrorCode PetscSFSetUp(PetscSF sf) 288 { 289 PetscFunctionBegin; 290 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 291 PetscSFCheckGraphSet(sf, 1); 292 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 293 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 294 PetscCall(PetscSFCheckGraphValid_Private(sf)); 295 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 296 PetscTryTypeMethod(sf, SetUp); 297 #if defined(PETSC_HAVE_CUDA) 298 if (sf->backend == PETSCSF_BACKEND_CUDA) { 299 sf->ops->Malloc = PetscSFMalloc_CUDA; 300 sf->ops->Free = PetscSFFree_CUDA; 301 } 302 #endif 303 #if defined(PETSC_HAVE_HIP) 304 if (sf->backend == PETSCSF_BACKEND_HIP) { 305 sf->ops->Malloc = PetscSFMalloc_HIP; 306 sf->ops->Free = PetscSFFree_HIP; 307 } 308 #endif 309 310 #if defined(PETSC_HAVE_KOKKOS) 311 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 312 sf->ops->Malloc = PetscSFMalloc_Kokkos; 313 sf->ops->Free = PetscSFFree_Kokkos; 314 } 315 #endif 316 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 317 sf->setupcalled = PETSC_TRUE; 318 PetscFunctionReturn(PETSC_SUCCESS); 319 } 320 321 /*@ 322 PetscSFSetFromOptions - set `PetscSF` options using the options database 323 324 Logically Collective 325 326 Input Parameter: 327 . sf - star forest 328 329 Options Database Keys: 330 + -sf_type - implementation type, see `PetscSFSetType()` 331 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 332 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 333 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 334 If true, this option only works with `-use_gpu_aware_mpi 1`. 335 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 336 If true, this option only works with `-use_gpu_aware_mpi 1`. 337 338 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 339 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 340 the only available is kokkos. 341 342 Level: intermediate 343 344 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 345 @*/ 346 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 347 { 348 PetscSFType deft; 349 char type[256]; 350 PetscBool flg; 351 352 PetscFunctionBegin; 353 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 354 PetscObjectOptionsBegin((PetscObject)sf); 355 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 356 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 357 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 358 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 359 #if defined(PETSC_HAVE_DEVICE) 360 { 361 char backendstr[32] = {0}; 362 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 363 /* Change the defaults set in PetscSFCreate() with command line options */ 364 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 365 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 366 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 367 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 368 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 369 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 370 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 371 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 372 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 373 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 374 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 375 #elif defined(PETSC_HAVE_KOKKOS) 376 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 377 #endif 378 379 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 380 if (sf->use_stream_aware_mpi) { 381 MPI_Info info; 382 383 PetscCallMPI(MPI_Info_create(&info)); 384 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 385 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 386 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 387 PetscCallMPI(MPI_Info_free(&info)); 388 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 389 } 390 #endif 391 } 392 #endif 393 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 394 PetscOptionsEnd(); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 /*@ 399 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 400 401 Logically Collective 402 403 Input Parameters: 404 + sf - star forest 405 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 406 407 Level: advanced 408 409 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 410 @*/ 411 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 412 { 413 PetscFunctionBegin; 414 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 415 PetscValidLogicalCollectiveBool(sf, flg, 2); 416 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 417 sf->rankorder = flg; 418 PetscFunctionReturn(PETSC_SUCCESS); 419 } 420 421 /*@C 422 PetscSFSetGraph - Set a parallel star forest 423 424 Collective 425 426 Input Parameters: 427 + sf - star forest 428 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 429 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 430 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 431 during setup in debug mode) 432 . localmode - copy mode for `ilocal` 433 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 434 during setup in debug mode) 435 - remotemode - copy mode for `iremote` 436 437 Level: intermediate 438 439 Notes: 440 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 441 442 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 443 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 444 PETSc might modify the respective array; 445 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 446 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 447 448 Fortran Notes: 449 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 450 451 Developer Notes: 452 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 453 This also allows to compare leaf sets of two `PetscSF`s easily. 454 455 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 456 @*/ 457 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 458 { 459 PetscBool unique, contiguous; 460 461 PetscFunctionBegin; 462 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 463 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 464 if (nleaves > 0) PetscAssertPointer(iremote, 6); 465 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 466 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 467 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 468 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 469 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 470 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 471 472 if (sf->nroots >= 0) { /* Reset only if graph already set */ 473 PetscCall(PetscSFReset(sf)); 474 } 475 476 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 477 478 sf->nroots = nroots; 479 sf->nleaves = nleaves; 480 481 if (localmode == PETSC_COPY_VALUES && ilocal) { 482 PetscInt *tlocal = NULL; 483 484 PetscCall(PetscMalloc1(nleaves, &tlocal)); 485 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 486 ilocal = tlocal; 487 } 488 if (remotemode == PETSC_COPY_VALUES) { 489 PetscSFNode *tremote = NULL; 490 491 PetscCall(PetscMalloc1(nleaves, &tremote)); 492 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 493 iremote = tremote; 494 } 495 496 if (nleaves && ilocal) { 497 PetscSFNode work; 498 499 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 500 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 501 unique = PetscNot(unique); 502 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 503 sf->minleaf = ilocal[0]; 504 sf->maxleaf = ilocal[nleaves - 1]; 505 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 506 } else { 507 sf->minleaf = 0; 508 sf->maxleaf = nleaves - 1; 509 unique = PETSC_TRUE; 510 contiguous = PETSC_TRUE; 511 } 512 513 if (contiguous) { 514 if (localmode == PETSC_USE_POINTER) { 515 ilocal = NULL; 516 } else { 517 PetscCall(PetscFree(ilocal)); 518 } 519 } 520 sf->mine = ilocal; 521 if (localmode == PETSC_USE_POINTER) { 522 sf->mine_alloc = NULL; 523 } else { 524 sf->mine_alloc = ilocal; 525 } 526 sf->remote = iremote; 527 if (remotemode == PETSC_USE_POINTER) { 528 sf->remote_alloc = NULL; 529 } else { 530 sf->remote_alloc = iremote; 531 } 532 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 533 sf->graphset = PETSC_TRUE; 534 PetscFunctionReturn(PETSC_SUCCESS); 535 } 536 537 /*@ 538 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 539 540 Collective 541 542 Input Parameters: 543 + sf - The `PetscSF` 544 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 545 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 546 547 Level: intermediate 548 549 Notes: 550 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 551 `n` and `N` are the local and global sizes of `x` respectively. 552 553 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 554 sequential vectors `y` on all MPI processes. 555 556 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 557 sequential vector `y` on rank 0. 558 559 In above cases, entries of `x` are roots and entries of `y` are leaves. 560 561 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 562 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 563 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 564 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 565 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 566 567 In this case, roots and leaves are symmetric. 568 569 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 570 @*/ 571 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 572 { 573 MPI_Comm comm; 574 PetscInt n, N, res[2]; 575 PetscMPIInt rank, size; 576 PetscSFType type; 577 578 PetscFunctionBegin; 579 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 580 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 581 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 582 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 583 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 584 PetscCallMPI(MPI_Comm_size(comm, &size)); 585 586 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 587 type = PETSCSFALLTOALL; 588 PetscCall(PetscLayoutCreate(comm, &sf->map)); 589 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 590 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 591 PetscCall(PetscLayoutSetUp(sf->map)); 592 } else { 593 PetscCall(PetscLayoutGetLocalSize(map, &n)); 594 PetscCall(PetscLayoutGetSize(map, &N)); 595 res[0] = n; 596 res[1] = -n; 597 /* Check if n are same over all ranks so that we can optimize it */ 598 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 599 if (res[0] == -res[1]) { /* same n */ 600 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 601 } else { 602 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 603 } 604 PetscCall(PetscLayoutReference(map, &sf->map)); 605 } 606 PetscCall(PetscSFSetType(sf, type)); 607 608 sf->pattern = pattern; 609 sf->mine = NULL; /* Contiguous */ 610 611 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 612 Also set other easy stuff. 613 */ 614 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 615 sf->nleaves = N; 616 sf->nroots = n; 617 sf->nranks = size; 618 sf->minleaf = 0; 619 sf->maxleaf = N - 1; 620 } else if (pattern == PETSCSF_PATTERN_GATHER) { 621 sf->nleaves = rank ? 0 : N; 622 sf->nroots = n; 623 sf->nranks = rank ? 0 : size; 624 sf->minleaf = 0; 625 sf->maxleaf = rank ? -1 : N - 1; 626 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 627 sf->nleaves = size; 628 sf->nroots = size; 629 sf->nranks = size; 630 sf->minleaf = 0; 631 sf->maxleaf = size - 1; 632 } 633 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 634 sf->graphset = PETSC_TRUE; 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /*@ 639 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 640 641 Collective 642 643 Input Parameter: 644 . sf - star forest to invert 645 646 Output Parameter: 647 . isf - inverse of `sf` 648 649 Level: advanced 650 651 Notes: 652 All roots must have degree 1. 653 654 The local space may be a permutation, but cannot be sparse. 655 656 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 657 @*/ 658 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 659 { 660 PetscMPIInt rank; 661 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 662 const PetscInt *ilocal; 663 PetscSFNode *roots, *leaves; 664 665 PetscFunctionBegin; 666 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 667 PetscSFCheckGraphSet(sf, 1); 668 PetscAssertPointer(isf, 2); 669 670 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 671 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 672 673 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 674 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 675 for (i = 0; i < maxlocal; i++) { 676 leaves[i].rank = rank; 677 leaves[i].index = i; 678 } 679 for (i = 0; i < nroots; i++) { 680 roots[i].rank = -1; 681 roots[i].index = -1; 682 } 683 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 684 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 685 686 /* Check whether our leaves are sparse */ 687 for (i = 0, count = 0; i < nroots; i++) 688 if (roots[i].rank >= 0) count++; 689 if (count == nroots) newilocal = NULL; 690 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 691 for (i = 0, count = 0; i < nroots; i++) { 692 if (roots[i].rank >= 0) { 693 newilocal[count] = i; 694 roots[count].rank = roots[i].rank; 695 roots[count].index = roots[i].index; 696 count++; 697 } 698 } 699 } 700 701 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 702 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 703 PetscCall(PetscFree2(roots, leaves)); 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 /*@ 708 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 709 710 Collective 711 712 Input Parameters: 713 + sf - communication object to duplicate 714 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 715 716 Output Parameter: 717 . newsf - new communication object 718 719 Level: beginner 720 721 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 722 @*/ 723 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 724 { 725 PetscSFType type; 726 MPI_Datatype dtype = MPIU_SCALAR; 727 728 PetscFunctionBegin; 729 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 730 PetscValidLogicalCollectiveEnum(sf, opt, 2); 731 PetscAssertPointer(newsf, 3); 732 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 733 PetscCall(PetscSFGetType(sf, &type)); 734 if (type) PetscCall(PetscSFSetType(*newsf, type)); 735 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 736 if (opt == PETSCSF_DUPLICATE_GRAPH) { 737 PetscSFCheckGraphSet(sf, 1); 738 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 739 PetscInt nroots, nleaves; 740 const PetscInt *ilocal; 741 const PetscSFNode *iremote; 742 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 743 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 744 } else { 745 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 746 } 747 } 748 /* Since oldtype is committed, so is newtype, according to MPI */ 749 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 750 (*newsf)->vscat.bs = sf->vscat.bs; 751 (*newsf)->vscat.unit = dtype; 752 (*newsf)->vscat.to_n = sf->vscat.to_n; 753 (*newsf)->vscat.from_n = sf->vscat.from_n; 754 /* Do not copy lsf. Build it on demand since it is rarely used */ 755 756 #if defined(PETSC_HAVE_DEVICE) 757 (*newsf)->backend = sf->backend; 758 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 759 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 760 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 761 #endif 762 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 763 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 764 PetscFunctionReturn(PETSC_SUCCESS); 765 } 766 767 /*@C 768 PetscSFGetGraph - Get the graph specifying a parallel star forest 769 770 Not Collective 771 772 Input Parameter: 773 . sf - star forest 774 775 Output Parameters: 776 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 777 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 778 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 779 - iremote - remote locations of root vertices for each leaf on the current process 780 781 Level: intermediate 782 783 Notes: 784 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 785 786 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 787 788 Fortran Notes: 789 The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you 790 want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array. 791 792 To check for a `NULL` `ilocal` use 793 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 794 795 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 796 @*/ 797 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 798 { 799 PetscFunctionBegin; 800 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 801 if (sf->ops->GetGraph) { 802 PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote)); 803 } else { 804 if (nroots) *nroots = sf->nroots; 805 if (nleaves) *nleaves = sf->nleaves; 806 if (ilocal) *ilocal = sf->mine; 807 if (iremote) *iremote = sf->remote; 808 } 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 /*@ 813 PetscSFGetLeafRange - Get the active leaf ranges 814 815 Not Collective 816 817 Input Parameter: 818 . sf - star forest 819 820 Output Parameters: 821 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 822 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 823 824 Level: developer 825 826 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 827 @*/ 828 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 829 { 830 PetscFunctionBegin; 831 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 832 PetscSFCheckGraphSet(sf, 1); 833 if (minleaf) *minleaf = sf->minleaf; 834 if (maxleaf) *maxleaf = sf->maxleaf; 835 PetscFunctionReturn(PETSC_SUCCESS); 836 } 837 838 /*@ 839 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 840 841 Collective 842 843 Input Parameters: 844 + A - the star forest 845 . obj - Optional object that provides the prefix for the option names 846 - name - command line option 847 848 Level: intermediate 849 850 Note: 851 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 852 853 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 854 @*/ 855 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 856 { 857 PetscFunctionBegin; 858 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 859 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 /*@ 864 PetscSFView - view a star forest 865 866 Collective 867 868 Input Parameters: 869 + sf - star forest 870 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 871 872 Level: beginner 873 874 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 875 @*/ 876 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 877 { 878 PetscBool iascii; 879 PetscViewerFormat format; 880 881 PetscFunctionBegin; 882 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 883 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 884 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 885 PetscCheckSameComm(sf, 1, viewer, 2); 886 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 887 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 888 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 889 PetscMPIInt rank; 890 PetscInt ii, i, j; 891 892 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 893 PetscCall(PetscViewerASCIIPushTab(viewer)); 894 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 895 if (!sf->graphset) { 896 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 897 PetscCall(PetscViewerASCIIPopTab(viewer)); 898 PetscFunctionReturn(PETSC_SUCCESS); 899 } 900 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 901 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 902 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 903 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 904 PetscCall(PetscViewerFlush(viewer)); 905 PetscCall(PetscViewerGetFormat(viewer, &format)); 906 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 907 PetscMPIInt *tmpranks, *perm; 908 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 909 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 910 for (i = 0; i < sf->nranks; i++) perm[i] = i; 911 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 912 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 913 for (ii = 0; ii < sf->nranks; ii++) { 914 i = perm[ii]; 915 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 916 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 917 } 918 PetscCall(PetscFree2(tmpranks, perm)); 919 } 920 PetscCall(PetscViewerFlush(viewer)); 921 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 922 } 923 PetscCall(PetscViewerASCIIPopTab(viewer)); 924 } 925 PetscTryTypeMethod(sf, View, viewer); 926 PetscFunctionReturn(PETSC_SUCCESS); 927 } 928 929 /*@C 930 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 931 932 Not Collective 933 934 Input Parameter: 935 . sf - star forest 936 937 Output Parameters: 938 + nranks - number of ranks referenced by local part 939 . ranks - [`nranks`] array of ranks 940 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 941 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank 942 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank 943 944 Level: developer 945 946 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 947 @*/ 948 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 949 { 950 PetscFunctionBegin; 951 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 952 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 953 if (sf->ops->GetRootRanks) { 954 PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote); 955 } else { 956 /* The generic implementation */ 957 if (nranks) *nranks = sf->nranks; 958 if (ranks) *ranks = sf->ranks; 959 if (roffset) *roffset = sf->roffset; 960 if (rmine) *rmine = sf->rmine; 961 if (rremote) *rremote = sf->rremote; 962 } 963 PetscFunctionReturn(PETSC_SUCCESS); 964 } 965 966 /*@C 967 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 968 969 Not Collective 970 971 Input Parameter: 972 . sf - star forest 973 974 Output Parameters: 975 + niranks - number of leaf ranks referencing roots on this process 976 . iranks - [`niranks`] array of ranks 977 . ioffset - [`niranks`+1] offset in `irootloc` for each rank 978 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 979 980 Level: developer 981 982 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 983 @*/ 984 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 985 { 986 PetscFunctionBegin; 987 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 988 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 989 if (sf->ops->GetLeafRanks) { 990 PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc); 991 } else { 992 PetscSFType type; 993 PetscCall(PetscSFGetType(sf, &type)); 994 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 995 } 996 PetscFunctionReturn(PETSC_SUCCESS); 997 } 998 999 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 1000 { 1001 PetscInt i; 1002 for (i = 0; i < n; i++) { 1003 if (needle == list[i]) return PETSC_TRUE; 1004 } 1005 return PETSC_FALSE; 1006 } 1007 1008 /*@C 1009 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 1010 1011 Collective 1012 1013 Input Parameters: 1014 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1015 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1016 1017 Level: developer 1018 1019 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 1020 @*/ 1021 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1022 { 1023 PetscHMapI table; 1024 PetscHashIter pos; 1025 PetscMPIInt size, groupsize, *groupranks; 1026 PetscInt *rcount, *ranks; 1027 PetscInt i, irank = -1, orank = -1; 1028 1029 PetscFunctionBegin; 1030 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1031 PetscSFCheckGraphSet(sf, 1); 1032 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1033 PetscCall(PetscHMapICreateWithSize(10, &table)); 1034 for (i = 0; i < sf->nleaves; i++) { 1035 /* Log 1-based rank */ 1036 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1037 } 1038 PetscCall(PetscHMapIGetSize(table, &sf->nranks)); 1039 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1040 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1041 PetscHashIterBegin(table, pos); 1042 for (i = 0; i < sf->nranks; i++) { 1043 PetscHashIterGetKey(table, pos, ranks[i]); 1044 PetscHashIterGetVal(table, pos, rcount[i]); 1045 PetscHashIterNext(table, pos); 1046 ranks[i]--; /* Convert back to 0-based */ 1047 } 1048 PetscCall(PetscHMapIDestroy(&table)); 1049 1050 /* We expect that dgroup is reliably "small" while nranks could be large */ 1051 { 1052 MPI_Group group = MPI_GROUP_NULL; 1053 PetscMPIInt *dgroupranks; 1054 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1055 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1056 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1057 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1058 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1059 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1060 PetscCallMPI(MPI_Group_free(&group)); 1061 PetscCall(PetscFree(dgroupranks)); 1062 } 1063 1064 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1065 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1066 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1067 if (InList(ranks[i], groupsize, groupranks)) break; 1068 } 1069 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1070 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1071 } 1072 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1073 PetscInt tmprank, tmpcount; 1074 1075 tmprank = ranks[i]; 1076 tmpcount = rcount[i]; 1077 ranks[i] = ranks[sf->ndranks]; 1078 rcount[i] = rcount[sf->ndranks]; 1079 ranks[sf->ndranks] = tmprank; 1080 rcount[sf->ndranks] = tmpcount; 1081 sf->ndranks++; 1082 } 1083 } 1084 PetscCall(PetscFree(groupranks)); 1085 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1086 if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1087 sf->roffset[0] = 0; 1088 for (i = 0; i < sf->nranks; i++) { 1089 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1090 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1091 rcount[i] = 0; 1092 } 1093 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1094 /* short circuit */ 1095 if (orank != sf->remote[i].rank) { 1096 /* Search for index of iremote[i].rank in sf->ranks */ 1097 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1098 if (irank < 0) { 1099 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1100 if (irank >= 0) irank += sf->ndranks; 1101 } 1102 orank = sf->remote[i].rank; 1103 } 1104 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1105 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1106 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1107 rcount[irank]++; 1108 } 1109 PetscCall(PetscFree2(rcount, ranks)); 1110 PetscFunctionReturn(PETSC_SUCCESS); 1111 } 1112 1113 /*@C 1114 PetscSFGetGroups - gets incoming and outgoing process groups 1115 1116 Collective 1117 1118 Input Parameter: 1119 . sf - star forest 1120 1121 Output Parameters: 1122 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1123 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1124 1125 Level: developer 1126 1127 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1128 @*/ 1129 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1130 { 1131 MPI_Group group = MPI_GROUP_NULL; 1132 1133 PetscFunctionBegin; 1134 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1135 if (sf->ingroup == MPI_GROUP_NULL) { 1136 PetscInt i; 1137 const PetscInt *indegree; 1138 PetscMPIInt rank, *outranks, *inranks; 1139 PetscSFNode *remote; 1140 PetscSF bgcount; 1141 1142 /* Compute the number of incoming ranks */ 1143 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1144 for (i = 0; i < sf->nranks; i++) { 1145 remote[i].rank = sf->ranks[i]; 1146 remote[i].index = 0; 1147 } 1148 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1149 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1150 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1151 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1152 /* Enumerate the incoming ranks */ 1153 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1154 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1155 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1156 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1157 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1158 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1159 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1160 PetscCallMPI(MPI_Group_free(&group)); 1161 PetscCall(PetscFree2(inranks, outranks)); 1162 PetscCall(PetscSFDestroy(&bgcount)); 1163 } 1164 *incoming = sf->ingroup; 1165 1166 if (sf->outgroup == MPI_GROUP_NULL) { 1167 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1168 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1169 PetscCallMPI(MPI_Group_free(&group)); 1170 } 1171 *outgoing = sf->outgroup; 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 /*@ 1176 PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks 1177 1178 Collective 1179 1180 Input Parameter: 1181 . sf - star forest 1182 1183 Output Parameter: 1184 . rsf - the star forest with a single root per process to perform communications 1185 1186 Level: developer 1187 1188 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()` 1189 @*/ 1190 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf) 1191 { 1192 PetscFunctionBegin; 1193 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1194 PetscAssertPointer(rsf, 2); 1195 if (!sf->rankssf) { 1196 PetscSFNode *rremotes; 1197 const PetscMPIInt *ranks; 1198 PetscInt nranks; 1199 1200 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 1201 PetscCall(PetscMalloc1(nranks, &rremotes)); 1202 for (PetscInt i = 0; i < nranks; i++) { 1203 rremotes[i].rank = ranks[i]; 1204 rremotes[i].index = 0; 1205 } 1206 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf)); 1207 PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 1208 } 1209 *rsf = sf->rankssf; 1210 PetscFunctionReturn(PETSC_SUCCESS); 1211 } 1212 1213 /*@ 1214 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1215 1216 Collective 1217 1218 Input Parameter: 1219 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1220 1221 Output Parameter: 1222 . multi - star forest with split roots, such that each root has degree exactly 1 1223 1224 Level: developer 1225 1226 Note: 1227 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1228 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1229 edge, it is a candidate for future optimization that might involve its removal. 1230 1231 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1232 @*/ 1233 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1234 { 1235 PetscFunctionBegin; 1236 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1237 PetscAssertPointer(multi, 2); 1238 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1239 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1240 *multi = sf->multi; 1241 sf->multi->multi = sf->multi; 1242 PetscFunctionReturn(PETSC_SUCCESS); 1243 } 1244 if (!sf->multi) { 1245 const PetscInt *indegree; 1246 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1247 PetscSFNode *remote; 1248 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1249 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1250 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1251 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1252 inoffset[0] = 0; 1253 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1254 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1255 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1256 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1257 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1258 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1259 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1260 } 1261 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1262 for (i = 0; i < sf->nleaves; i++) { 1263 remote[i].rank = sf->remote[i].rank; 1264 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1265 } 1266 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1267 sf->multi->multi = sf->multi; 1268 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1269 if (sf->rankorder) { /* Sort the ranks */ 1270 PetscMPIInt rank; 1271 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1272 PetscSFNode *newremote; 1273 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1274 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1275 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1276 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1277 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1278 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1279 /* Sort the incoming ranks at each vertex, build the inverse map */ 1280 for (i = 0; i < sf->nroots; i++) { 1281 PetscInt j; 1282 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1283 PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset)); 1284 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1285 } 1286 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1287 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1288 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1289 for (i = 0; i < sf->nleaves; i++) { 1290 newremote[i].rank = sf->remote[i].rank; 1291 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1292 } 1293 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1294 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1295 } 1296 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1297 } 1298 *multi = sf->multi; 1299 PetscFunctionReturn(PETSC_SUCCESS); 1300 } 1301 1302 /*@C 1303 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1304 1305 Collective 1306 1307 Input Parameters: 1308 + sf - original star forest 1309 . nselected - number of selected roots on this process 1310 - selected - indices of the selected roots on this process 1311 1312 Output Parameter: 1313 . esf - new star forest 1314 1315 Level: advanced 1316 1317 Note: 1318 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1319 be done by calling PetscSFGetGraph(). 1320 1321 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1322 @*/ 1323 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1324 { 1325 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1326 const PetscInt *ilocal; 1327 signed char *rootdata, *leafdata, *leafmem; 1328 const PetscSFNode *iremote; 1329 PetscSFNode *new_iremote; 1330 MPI_Comm comm; 1331 1332 PetscFunctionBegin; 1333 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1334 PetscSFCheckGraphSet(sf, 1); 1335 if (nselected) PetscAssertPointer(selected, 3); 1336 PetscAssertPointer(esf, 4); 1337 1338 PetscCall(PetscSFSetUp(sf)); 1339 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1340 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1341 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1342 1343 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1344 PetscBool dups; 1345 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1346 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1347 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1348 } 1349 1350 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1351 else { 1352 /* A generic version of creating embedded sf */ 1353 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1354 maxlocal = maxleaf - minleaf + 1; 1355 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1356 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 1357 /* Tag selected roots and bcast to leaves */ 1358 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1359 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1360 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1361 1362 /* Build esf with leaves that are still connected */ 1363 esf_nleaves = 0; 1364 for (i = 0; i < nleaves; i++) { 1365 j = ilocal ? ilocal[i] : i; 1366 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1367 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1368 */ 1369 esf_nleaves += (leafdata[j] ? 1 : 0); 1370 } 1371 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1372 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1373 for (i = n = 0; i < nleaves; i++) { 1374 j = ilocal ? ilocal[i] : i; 1375 if (leafdata[j]) { 1376 new_ilocal[n] = j; 1377 new_iremote[n].rank = iremote[i].rank; 1378 new_iremote[n].index = iremote[i].index; 1379 ++n; 1380 } 1381 } 1382 PetscCall(PetscSFCreate(comm, esf)); 1383 PetscCall(PetscSFSetFromOptions(*esf)); 1384 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1385 PetscCall(PetscFree2(rootdata, leafmem)); 1386 } 1387 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1388 PetscFunctionReturn(PETSC_SUCCESS); 1389 } 1390 1391 /*@C 1392 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1393 1394 Collective 1395 1396 Input Parameters: 1397 + sf - original star forest 1398 . nselected - number of selected leaves on this process 1399 - selected - indices of the selected leaves on this process 1400 1401 Output Parameter: 1402 . newsf - new star forest 1403 1404 Level: advanced 1405 1406 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1407 @*/ 1408 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1409 { 1410 const PetscSFNode *iremote; 1411 PetscSFNode *new_iremote; 1412 const PetscInt *ilocal; 1413 PetscInt i, nroots, *leaves, *new_ilocal; 1414 MPI_Comm comm; 1415 1416 PetscFunctionBegin; 1417 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1418 PetscSFCheckGraphSet(sf, 1); 1419 if (nselected) PetscAssertPointer(selected, 3); 1420 PetscAssertPointer(newsf, 4); 1421 1422 /* Uniq selected[] and put results in leaves[] */ 1423 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1424 PetscCall(PetscMalloc1(nselected, &leaves)); 1425 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1426 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1427 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1428 1429 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1430 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1431 else { 1432 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1433 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1434 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1435 for (i = 0; i < nselected; ++i) { 1436 const PetscInt l = leaves[i]; 1437 new_ilocal[i] = ilocal ? ilocal[l] : l; 1438 new_iremote[i].rank = iremote[l].rank; 1439 new_iremote[i].index = iremote[l].index; 1440 } 1441 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1442 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1443 } 1444 PetscCall(PetscFree(leaves)); 1445 PetscFunctionReturn(PETSC_SUCCESS); 1446 } 1447 1448 /*@C 1449 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1450 1451 Collective 1452 1453 Input Parameters: 1454 + sf - star forest on which to communicate 1455 . unit - data type associated with each node 1456 . rootdata - buffer to broadcast 1457 - op - operation to use for reduction 1458 1459 Output Parameter: 1460 . leafdata - buffer to be reduced with values from each leaf's respective root 1461 1462 Level: intermediate 1463 1464 Note: 1465 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1466 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1467 use `PetscSFBcastWithMemTypeBegin()` instead. 1468 1469 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1470 @*/ 1471 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1472 { 1473 PetscMemType rootmtype, leafmtype; 1474 1475 PetscFunctionBegin; 1476 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1477 PetscCall(PetscSFSetUp(sf)); 1478 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1479 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1480 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1481 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1482 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1483 PetscFunctionReturn(PETSC_SUCCESS); 1484 } 1485 1486 /*@C 1487 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1488 to `PetscSFBcastEnd()` 1489 1490 Collective 1491 1492 Input Parameters: 1493 + sf - star forest on which to communicate 1494 . unit - data type associated with each node 1495 . rootmtype - memory type of rootdata 1496 . rootdata - buffer to broadcast 1497 . leafmtype - memory type of leafdata 1498 - op - operation to use for reduction 1499 1500 Output Parameter: 1501 . leafdata - buffer to be reduced with values from each leaf's respective root 1502 1503 Level: intermediate 1504 1505 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1506 @*/ 1507 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1508 { 1509 PetscFunctionBegin; 1510 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1511 PetscCall(PetscSFSetUp(sf)); 1512 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1513 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1514 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1515 PetscFunctionReturn(PETSC_SUCCESS); 1516 } 1517 1518 /*@C 1519 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1520 1521 Collective 1522 1523 Input Parameters: 1524 + sf - star forest 1525 . unit - data type 1526 . rootdata - buffer to broadcast 1527 - op - operation to use for reduction 1528 1529 Output Parameter: 1530 . leafdata - buffer to be reduced with values from each leaf's respective root 1531 1532 Level: intermediate 1533 1534 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1535 @*/ 1536 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1537 { 1538 PetscFunctionBegin; 1539 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1540 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1541 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1542 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1543 PetscFunctionReturn(PETSC_SUCCESS); 1544 } 1545 1546 /*@C 1547 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1548 1549 Collective 1550 1551 Input Parameters: 1552 + sf - star forest 1553 . unit - data type 1554 . leafdata - values to reduce 1555 - op - reduction operation 1556 1557 Output Parameter: 1558 . rootdata - result of reduction of values from all leaves of each root 1559 1560 Level: intermediate 1561 1562 Note: 1563 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1564 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1565 use `PetscSFReduceWithMemTypeBegin()` instead. 1566 1567 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 1568 @*/ 1569 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1570 { 1571 PetscMemType rootmtype, leafmtype; 1572 1573 PetscFunctionBegin; 1574 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1575 PetscCall(PetscSFSetUp(sf)); 1576 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1577 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1578 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1579 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1580 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1581 PetscFunctionReturn(PETSC_SUCCESS); 1582 } 1583 1584 /*@C 1585 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1586 1587 Collective 1588 1589 Input Parameters: 1590 + sf - star forest 1591 . unit - data type 1592 . leafmtype - memory type of leafdata 1593 . leafdata - values to reduce 1594 . rootmtype - memory type of rootdata 1595 - op - reduction operation 1596 1597 Output Parameter: 1598 . rootdata - result of reduction of values from all leaves of each root 1599 1600 Level: intermediate 1601 1602 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1603 @*/ 1604 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1605 { 1606 PetscFunctionBegin; 1607 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1608 PetscCall(PetscSFSetUp(sf)); 1609 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1610 PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1611 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1612 PetscFunctionReturn(PETSC_SUCCESS); 1613 } 1614 1615 /*@C 1616 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1617 1618 Collective 1619 1620 Input Parameters: 1621 + sf - star forest 1622 . unit - data type 1623 . leafdata - values to reduce 1624 - op - reduction operation 1625 1626 Output Parameter: 1627 . rootdata - result of reduction of values from all leaves of each root 1628 1629 Level: intermediate 1630 1631 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1632 @*/ 1633 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1634 { 1635 PetscFunctionBegin; 1636 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1637 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1638 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1639 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1640 PetscFunctionReturn(PETSC_SUCCESS); 1641 } 1642 1643 /*@C 1644 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1645 to be completed with `PetscSFFetchAndOpEnd()` 1646 1647 Collective 1648 1649 Input Parameters: 1650 + sf - star forest 1651 . unit - data type 1652 . leafdata - leaf values to use in reduction 1653 - op - operation to use for reduction 1654 1655 Output Parameters: 1656 + rootdata - root values to be updated, input state is seen by first process to perform an update 1657 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1658 1659 Level: advanced 1660 1661 Note: 1662 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1663 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1664 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1665 integers. 1666 1667 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1668 @*/ 1669 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1670 { 1671 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1672 1673 PetscFunctionBegin; 1674 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1675 PetscCall(PetscSFSetUp(sf)); 1676 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1677 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1678 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1679 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1680 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1681 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1682 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1683 PetscFunctionReturn(PETSC_SUCCESS); 1684 } 1685 1686 /*@C 1687 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1688 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1689 1690 Collective 1691 1692 Input Parameters: 1693 + sf - star forest 1694 . unit - data type 1695 . rootmtype - memory type of rootdata 1696 . leafmtype - memory type of leafdata 1697 . leafdata - leaf values to use in reduction 1698 . leafupdatemtype - memory type of leafupdate 1699 - op - operation to use for reduction 1700 1701 Output Parameters: 1702 + rootdata - root values to be updated, input state is seen by first process to perform an update 1703 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1704 1705 Level: advanced 1706 1707 Note: 1708 See `PetscSFFetchAndOpBegin()` for more details. 1709 1710 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1711 @*/ 1712 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1713 { 1714 PetscFunctionBegin; 1715 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1716 PetscCall(PetscSFSetUp(sf)); 1717 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1718 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1719 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1720 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1721 PetscFunctionReturn(PETSC_SUCCESS); 1722 } 1723 1724 /*@C 1725 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1726 to fetch values from roots and update atomically by applying operation using my leaf value 1727 1728 Collective 1729 1730 Input Parameters: 1731 + sf - star forest 1732 . unit - data type 1733 . leafdata - leaf values to use in reduction 1734 - op - operation to use for reduction 1735 1736 Output Parameters: 1737 + rootdata - root values to be updated, input state is seen by first process to perform an update 1738 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1739 1740 Level: advanced 1741 1742 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1743 @*/ 1744 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1745 { 1746 PetscFunctionBegin; 1747 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1748 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1749 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1750 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1751 PetscFunctionReturn(PETSC_SUCCESS); 1752 } 1753 1754 /*@C 1755 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1756 1757 Collective 1758 1759 Input Parameter: 1760 . sf - star forest 1761 1762 Output Parameter: 1763 . degree - degree of each root vertex 1764 1765 Level: advanced 1766 1767 Note: 1768 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1769 1770 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1771 @*/ 1772 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) 1773 { 1774 PetscFunctionBegin; 1775 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1776 PetscSFCheckGraphSet(sf, 1); 1777 PetscAssertPointer(degree, 2); 1778 if (!sf->degreeknown) { 1779 PetscInt i, nroots = sf->nroots, maxlocal; 1780 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1781 maxlocal = sf->maxleaf - sf->minleaf + 1; 1782 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1783 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1784 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1785 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1786 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1787 } 1788 *degree = NULL; 1789 PetscFunctionReturn(PETSC_SUCCESS); 1790 } 1791 1792 /*@C 1793 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1794 1795 Collective 1796 1797 Input Parameter: 1798 . sf - star forest 1799 1800 Output Parameter: 1801 . degree - degree of each root vertex 1802 1803 Level: developer 1804 1805 Note: 1806 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1807 1808 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1809 @*/ 1810 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1811 { 1812 PetscFunctionBegin; 1813 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1814 PetscSFCheckGraphSet(sf, 1); 1815 PetscAssertPointer(degree, 2); 1816 if (!sf->degreeknown) { 1817 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1818 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1819 PetscCall(PetscFree(sf->degreetmp)); 1820 sf->degreeknown = PETSC_TRUE; 1821 } 1822 *degree = sf->degree; 1823 PetscFunctionReturn(PETSC_SUCCESS); 1824 } 1825 1826 /*@C 1827 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1828 Each multi-root is assigned index of the corresponding original root. 1829 1830 Collective 1831 1832 Input Parameters: 1833 + sf - star forest 1834 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1835 1836 Output Parameters: 1837 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1838 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1839 1840 Level: developer 1841 1842 Note: 1843 The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1844 1845 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1846 @*/ 1847 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1848 { 1849 PetscSF msf; 1850 PetscInt i, j, k, nroots, nmroots; 1851 1852 PetscFunctionBegin; 1853 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1854 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1855 if (nroots) PetscAssertPointer(degree, 2); 1856 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1857 PetscAssertPointer(multiRootsOrigNumbering, 4); 1858 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1859 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1860 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1861 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1862 if (!degree[i]) continue; 1863 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1864 } 1865 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1866 if (nMultiRoots) *nMultiRoots = nmroots; 1867 PetscFunctionReturn(PETSC_SUCCESS); 1868 } 1869 1870 /*@C 1871 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1872 1873 Collective 1874 1875 Input Parameters: 1876 + sf - star forest 1877 . unit - data type 1878 - leafdata - leaf data to gather to roots 1879 1880 Output Parameter: 1881 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1882 1883 Level: intermediate 1884 1885 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1886 @*/ 1887 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1888 { 1889 PetscSF multi = NULL; 1890 1891 PetscFunctionBegin; 1892 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1893 PetscCall(PetscSFSetUp(sf)); 1894 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1895 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1896 PetscFunctionReturn(PETSC_SUCCESS); 1897 } 1898 1899 /*@C 1900 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1901 1902 Collective 1903 1904 Input Parameters: 1905 + sf - star forest 1906 . unit - data type 1907 - leafdata - leaf data to gather to roots 1908 1909 Output Parameter: 1910 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1911 1912 Level: intermediate 1913 1914 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1915 @*/ 1916 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1917 { 1918 PetscSF multi = NULL; 1919 1920 PetscFunctionBegin; 1921 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1922 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1923 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1924 PetscFunctionReturn(PETSC_SUCCESS); 1925 } 1926 1927 /*@C 1928 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1929 1930 Collective 1931 1932 Input Parameters: 1933 + sf - star forest 1934 . unit - data type 1935 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1936 1937 Output Parameter: 1938 . leafdata - leaf data to be update with personal data from each respective root 1939 1940 Level: intermediate 1941 1942 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 1943 @*/ 1944 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1945 { 1946 PetscSF multi = NULL; 1947 1948 PetscFunctionBegin; 1949 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1950 PetscCall(PetscSFSetUp(sf)); 1951 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1952 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1953 PetscFunctionReturn(PETSC_SUCCESS); 1954 } 1955 1956 /*@C 1957 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1958 1959 Collective 1960 1961 Input Parameters: 1962 + sf - star forest 1963 . unit - data type 1964 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1965 1966 Output Parameter: 1967 . leafdata - leaf data to be update with personal data from each respective root 1968 1969 Level: intermediate 1970 1971 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1972 @*/ 1973 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1974 { 1975 PetscSF multi = NULL; 1976 1977 PetscFunctionBegin; 1978 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1979 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1980 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1981 PetscFunctionReturn(PETSC_SUCCESS); 1982 } 1983 1984 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1985 { 1986 PetscInt i, n, nleaves; 1987 const PetscInt *ilocal = NULL; 1988 PetscHSetI seen; 1989 1990 PetscFunctionBegin; 1991 if (PetscDefined(USE_DEBUG)) { 1992 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1993 PetscCall(PetscHSetICreate(&seen)); 1994 for (i = 0; i < nleaves; i++) { 1995 const PetscInt leaf = ilocal ? ilocal[i] : i; 1996 PetscCall(PetscHSetIAdd(seen, leaf)); 1997 } 1998 PetscCall(PetscHSetIGetSize(seen, &n)); 1999 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 2000 PetscCall(PetscHSetIDestroy(&seen)); 2001 } 2002 PetscFunctionReturn(PETSC_SUCCESS); 2003 } 2004 2005 /*@ 2006 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 2007 2008 Input Parameters: 2009 + sfA - The first `PetscSF` 2010 - sfB - The second `PetscSF` 2011 2012 Output Parameter: 2013 . sfBA - The composite `PetscSF` 2014 2015 Level: developer 2016 2017 Notes: 2018 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2019 forests, i.e. the same leaf is not connected with different roots. 2020 2021 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 2022 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 2023 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 2024 `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 2025 2026 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2027 @*/ 2028 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2029 { 2030 const PetscSFNode *remotePointsA, *remotePointsB; 2031 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 2032 const PetscInt *localPointsA, *localPointsB; 2033 PetscInt *localPointsBA; 2034 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 2035 PetscBool denseB; 2036 2037 PetscFunctionBegin; 2038 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2039 PetscSFCheckGraphSet(sfA, 1); 2040 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2041 PetscSFCheckGraphSet(sfB, 2); 2042 PetscCheckSameComm(sfA, 1, sfB, 2); 2043 PetscAssertPointer(sfBA, 3); 2044 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2045 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2046 2047 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2048 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2049 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2050 numRootsB; otherwise, garbage will be broadcasted. 2051 Example (comm size = 1): 2052 sfA: 0 <- (0, 0) 2053 sfB: 100 <- (0, 0) 2054 101 <- (0, 1) 2055 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2056 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2057 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2058 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2059 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2060 for (i = 0; i < numRootsB; i++) { 2061 reorderedRemotePointsA[i].rank = -1; 2062 reorderedRemotePointsA[i].index = -1; 2063 } 2064 for (i = 0; i < numLeavesA; i++) { 2065 PetscInt localp = localPointsA ? localPointsA[i] : i; 2066 2067 if (localp >= numRootsB) continue; 2068 reorderedRemotePointsA[localp] = remotePointsA[i]; 2069 } 2070 remotePointsA = reorderedRemotePointsA; 2071 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2072 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2073 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2074 leafdataB[i].rank = -1; 2075 leafdataB[i].index = -1; 2076 } 2077 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2078 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE)); 2079 PetscCall(PetscFree(reorderedRemotePointsA)); 2080 2081 denseB = (PetscBool)!localPointsB; 2082 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2083 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2084 else numLeavesBA++; 2085 } 2086 if (denseB) { 2087 localPointsBA = NULL; 2088 remotePointsBA = leafdataB; 2089 } else { 2090 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2091 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2092 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2093 const PetscInt l = localPointsB ? localPointsB[i] : i; 2094 2095 if (leafdataB[l - minleaf].rank == -1) continue; 2096 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2097 localPointsBA[numLeavesBA] = l; 2098 numLeavesBA++; 2099 } 2100 PetscCall(PetscFree(leafdataB)); 2101 } 2102 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2103 PetscCall(PetscSFSetFromOptions(*sfBA)); 2104 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2105 PetscFunctionReturn(PETSC_SUCCESS); 2106 } 2107 2108 /*@ 2109 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2110 2111 Input Parameters: 2112 + sfA - The first `PetscSF` 2113 - sfB - The second `PetscSF` 2114 2115 Output Parameter: 2116 . sfBA - The composite `PetscSF`. 2117 2118 Level: developer 2119 2120 Notes: 2121 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2122 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2123 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2124 2125 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2126 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2127 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 2128 on `sfA`, then 2129 a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 2130 2131 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2132 @*/ 2133 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2134 { 2135 const PetscSFNode *remotePointsA, *remotePointsB; 2136 PetscSFNode *remotePointsBA; 2137 const PetscInt *localPointsA, *localPointsB; 2138 PetscSFNode *reorderedRemotePointsA = NULL; 2139 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2140 MPI_Op op; 2141 #if defined(PETSC_USE_64BIT_INDICES) 2142 PetscBool iswin; 2143 #endif 2144 2145 PetscFunctionBegin; 2146 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2147 PetscSFCheckGraphSet(sfA, 1); 2148 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2149 PetscSFCheckGraphSet(sfB, 2); 2150 PetscCheckSameComm(sfA, 1, sfB, 2); 2151 PetscAssertPointer(sfBA, 3); 2152 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2153 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2154 2155 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2156 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2157 2158 /* TODO: Check roots of sfB have degree of 1 */ 2159 /* Once we implement it, we can replace the MPI_MAXLOC 2160 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2161 We use MPI_MAXLOC only to have a deterministic output from this routine if 2162 the root condition is not meet. 2163 */ 2164 op = MPI_MAXLOC; 2165 #if defined(PETSC_USE_64BIT_INDICES) 2166 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2167 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2168 if (iswin) op = MPI_REPLACE; 2169 #endif 2170 2171 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2172 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2173 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2174 reorderedRemotePointsA[i].rank = -1; 2175 reorderedRemotePointsA[i].index = -1; 2176 } 2177 if (localPointsA) { 2178 for (i = 0; i < numLeavesA; i++) { 2179 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2180 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2181 } 2182 } else { 2183 for (i = 0; i < numLeavesA; i++) { 2184 if (i > maxleaf || i < minleaf) continue; 2185 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2186 } 2187 } 2188 2189 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2190 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2191 for (i = 0; i < numRootsB; i++) { 2192 remotePointsBA[i].rank = -1; 2193 remotePointsBA[i].index = -1; 2194 } 2195 2196 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2197 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op)); 2198 PetscCall(PetscFree(reorderedRemotePointsA)); 2199 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2200 if (remotePointsBA[i].rank == -1) continue; 2201 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2202 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2203 localPointsBA[numLeavesBA] = i; 2204 numLeavesBA++; 2205 } 2206 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2207 PetscCall(PetscSFSetFromOptions(*sfBA)); 2208 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2209 PetscFunctionReturn(PETSC_SUCCESS); 2210 } 2211 2212 /* 2213 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2214 2215 Input Parameter: 2216 . sf - The global `PetscSF` 2217 2218 Output Parameter: 2219 . out - The local `PetscSF` 2220 2221 .seealso: `PetscSF`, `PetscSFCreate()` 2222 */ 2223 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2224 { 2225 MPI_Comm comm; 2226 PetscMPIInt myrank; 2227 const PetscInt *ilocal; 2228 const PetscSFNode *iremote; 2229 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2230 PetscSFNode *liremote; 2231 PetscSF lsf; 2232 2233 PetscFunctionBegin; 2234 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2235 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2236 else { 2237 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2238 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2239 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2240 2241 /* Find out local edges and build a local SF */ 2242 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2243 for (i = lnleaves = 0; i < nleaves; i++) { 2244 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2245 } 2246 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2247 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2248 2249 for (i = j = 0; i < nleaves; i++) { 2250 if (iremote[i].rank == (PetscInt)myrank) { 2251 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2252 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2253 liremote[j].index = iremote[i].index; 2254 j++; 2255 } 2256 } 2257 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2258 PetscCall(PetscSFSetFromOptions(lsf)); 2259 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2260 PetscCall(PetscSFSetUp(lsf)); 2261 *out = lsf; 2262 } 2263 PetscFunctionReturn(PETSC_SUCCESS); 2264 } 2265 2266 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2267 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2268 { 2269 PetscMemType rootmtype, leafmtype; 2270 2271 PetscFunctionBegin; 2272 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2273 PetscCall(PetscSFSetUp(sf)); 2274 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2275 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2276 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2277 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2278 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2279 PetscFunctionReturn(PETSC_SUCCESS); 2280 } 2281 2282 /*@ 2283 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2284 2285 Input Parameters: 2286 + comm - the communicator 2287 . nsfs - the number of input `PetscSF` 2288 . sfs - the array of input `PetscSF` 2289 . rootMode - the root mode specifying how roots are handled 2290 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2291 2292 Output Parameter: 2293 . newsf - The resulting `PetscSF` 2294 2295 Level: advanced 2296 2297 Notes: 2298 The communicator of all `PetscSF`s in `sfs` must be comm. 2299 2300 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2301 2302 The offsets in `leafOffsets` are added to the original leaf indices. 2303 2304 If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2305 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2306 2307 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2308 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2309 2310 All root modes retain the essential connectivity condition. 2311 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2312 Parameter `rootMode` controls how the input root spaces are combined. 2313 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2314 and is also the same in the output `PetscSF`. 2315 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2316 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2317 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2318 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2319 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2320 the original root ranks are ignored. 2321 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2322 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 2323 to keep the load balancing. 2324 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 2325 2326 Example: 2327 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2328 .vb 2329 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2330 for m in {local,global,shared}; do 2331 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2332 done 2333 .ve 2334 we generate two identical `PetscSF`s sf_0 and sf_1, 2335 .vb 2336 PetscSF Object: sf_0 2 MPI processes 2337 type: basic 2338 rank #leaves #roots 2339 [ 0] 4 2 2340 [ 1] 4 2 2341 leaves roots roots in global numbering 2342 ( 0, 0) <- ( 0, 0) = 0 2343 ( 0, 1) <- ( 0, 1) = 1 2344 ( 0, 2) <- ( 1, 0) = 2 2345 ( 0, 3) <- ( 1, 1) = 3 2346 ( 1, 0) <- ( 0, 0) = 0 2347 ( 1, 1) <- ( 0, 1) = 1 2348 ( 1, 2) <- ( 1, 0) = 2 2349 ( 1, 3) <- ( 1, 1) = 3 2350 .ve 2351 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2352 .vb 2353 rootMode = local: 2354 PetscSF Object: result_sf 2 MPI processes 2355 type: basic 2356 rank #leaves #roots 2357 [ 0] 8 4 2358 [ 1] 8 4 2359 leaves roots roots in global numbering 2360 ( 0, 0) <- ( 0, 0) = 0 2361 ( 0, 1) <- ( 0, 1) = 1 2362 ( 0, 2) <- ( 1, 0) = 4 2363 ( 0, 3) <- ( 1, 1) = 5 2364 ( 0, 4) <- ( 0, 2) = 2 2365 ( 0, 5) <- ( 0, 3) = 3 2366 ( 0, 6) <- ( 1, 2) = 6 2367 ( 0, 7) <- ( 1, 3) = 7 2368 ( 1, 0) <- ( 0, 0) = 0 2369 ( 1, 1) <- ( 0, 1) = 1 2370 ( 1, 2) <- ( 1, 0) = 4 2371 ( 1, 3) <- ( 1, 1) = 5 2372 ( 1, 4) <- ( 0, 2) = 2 2373 ( 1, 5) <- ( 0, 3) = 3 2374 ( 1, 6) <- ( 1, 2) = 6 2375 ( 1, 7) <- ( 1, 3) = 7 2376 2377 rootMode = global: 2378 PetscSF Object: result_sf 2 MPI processes 2379 type: basic 2380 rank #leaves #roots 2381 [ 0] 8 4 2382 [ 1] 8 4 2383 leaves roots roots in global numbering 2384 ( 0, 0) <- ( 0, 0) = 0 2385 ( 0, 1) <- ( 0, 1) = 1 2386 ( 0, 2) <- ( 0, 2) = 2 2387 ( 0, 3) <- ( 0, 3) = 3 2388 ( 0, 4) <- ( 1, 0) = 4 2389 ( 0, 5) <- ( 1, 1) = 5 2390 ( 0, 6) <- ( 1, 2) = 6 2391 ( 0, 7) <- ( 1, 3) = 7 2392 ( 1, 0) <- ( 0, 0) = 0 2393 ( 1, 1) <- ( 0, 1) = 1 2394 ( 1, 2) <- ( 0, 2) = 2 2395 ( 1, 3) <- ( 0, 3) = 3 2396 ( 1, 4) <- ( 1, 0) = 4 2397 ( 1, 5) <- ( 1, 1) = 5 2398 ( 1, 6) <- ( 1, 2) = 6 2399 ( 1, 7) <- ( 1, 3) = 7 2400 2401 rootMode = shared: 2402 PetscSF Object: result_sf 2 MPI processes 2403 type: basic 2404 rank #leaves #roots 2405 [ 0] 8 2 2406 [ 1] 8 2 2407 leaves roots roots in global numbering 2408 ( 0, 0) <- ( 0, 0) = 0 2409 ( 0, 1) <- ( 0, 1) = 1 2410 ( 0, 2) <- ( 1, 0) = 2 2411 ( 0, 3) <- ( 1, 1) = 3 2412 ( 0, 4) <- ( 0, 0) = 0 2413 ( 0, 5) <- ( 0, 1) = 1 2414 ( 0, 6) <- ( 1, 0) = 2 2415 ( 0, 7) <- ( 1, 1) = 3 2416 ( 1, 0) <- ( 0, 0) = 0 2417 ( 1, 1) <- ( 0, 1) = 1 2418 ( 1, 2) <- ( 1, 0) = 2 2419 ( 1, 3) <- ( 1, 1) = 3 2420 ( 1, 4) <- ( 0, 0) = 0 2421 ( 1, 5) <- ( 0, 1) = 1 2422 ( 1, 6) <- ( 1, 0) = 2 2423 ( 1, 7) <- ( 1, 1) = 3 2424 .ve 2425 2426 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2427 @*/ 2428 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2429 { 2430 PetscInt i, s, nLeaves, nRoots; 2431 PetscInt *leafArrayOffsets; 2432 PetscInt *ilocal_new; 2433 PetscSFNode *iremote_new; 2434 PetscBool all_ilocal_null = PETSC_FALSE; 2435 PetscLayout glayout = NULL; 2436 PetscInt *gremote = NULL; 2437 PetscMPIInt rank, size; 2438 2439 PetscFunctionBegin; 2440 if (PetscDefined(USE_DEBUG)) { 2441 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2442 2443 PetscCall(PetscSFCreate(comm, &dummy)); 2444 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2445 PetscAssertPointer(sfs, 3); 2446 for (i = 0; i < nsfs; i++) { 2447 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2448 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2449 } 2450 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2451 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2452 PetscAssertPointer(newsf, 6); 2453 PetscCall(PetscSFDestroy(&dummy)); 2454 } 2455 if (!nsfs) { 2456 PetscCall(PetscSFCreate(comm, newsf)); 2457 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2458 PetscFunctionReturn(PETSC_SUCCESS); 2459 } 2460 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2461 PetscCallMPI(MPI_Comm_size(comm, &size)); 2462 2463 /* Calculate leaf array offsets */ 2464 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2465 leafArrayOffsets[0] = 0; 2466 for (s = 0; s < nsfs; s++) { 2467 PetscInt nl; 2468 2469 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2470 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2471 } 2472 nLeaves = leafArrayOffsets[nsfs]; 2473 2474 /* Calculate number of roots */ 2475 switch (rootMode) { 2476 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2477 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2478 if (PetscDefined(USE_DEBUG)) { 2479 for (s = 1; s < nsfs; s++) { 2480 PetscInt nr; 2481 2482 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2483 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2484 } 2485 } 2486 } break; 2487 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2488 /* Calculate also global layout in this case */ 2489 PetscInt *nls; 2490 PetscLayout *lts; 2491 PetscInt **inds; 2492 PetscInt j; 2493 PetscInt rootOffset = 0; 2494 2495 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2496 PetscCall(PetscLayoutCreate(comm, &glayout)); 2497 glayout->bs = 1; 2498 glayout->n = 0; 2499 glayout->N = 0; 2500 for (s = 0; s < nsfs; s++) { 2501 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2502 glayout->n += lts[s]->n; 2503 glayout->N += lts[s]->N; 2504 } 2505 PetscCall(PetscLayoutSetUp(glayout)); 2506 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2507 for (s = 0, j = 0; s < nsfs; s++) { 2508 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2509 rootOffset += lts[s]->N; 2510 PetscCall(PetscLayoutDestroy(<s[s])); 2511 PetscCall(PetscFree(inds[s])); 2512 } 2513 PetscCall(PetscFree3(lts, nls, inds)); 2514 nRoots = glayout->N; 2515 } break; 2516 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2517 /* nRoots calculated later in this case */ 2518 break; 2519 default: 2520 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2521 } 2522 2523 if (!leafOffsets) { 2524 all_ilocal_null = PETSC_TRUE; 2525 for (s = 0; s < nsfs; s++) { 2526 const PetscInt *ilocal; 2527 2528 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2529 if (ilocal) { 2530 all_ilocal_null = PETSC_FALSE; 2531 break; 2532 } 2533 } 2534 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2535 } 2536 2537 /* Renumber and concatenate local leaves */ 2538 ilocal_new = NULL; 2539 if (!all_ilocal_null) { 2540 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2541 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2542 for (s = 0; s < nsfs; s++) { 2543 const PetscInt *ilocal; 2544 PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]); 2545 PetscInt i, nleaves_l; 2546 2547 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2548 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2549 } 2550 } 2551 2552 /* Renumber and concatenate remote roots */ 2553 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2554 PetscInt rootOffset = 0; 2555 2556 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2557 for (i = 0; i < nLeaves; i++) { 2558 iremote_new[i].rank = -1; 2559 iremote_new[i].index = -1; 2560 } 2561 for (s = 0; s < nsfs; s++) { 2562 PetscInt i, nl, nr; 2563 PetscSF tmp_sf; 2564 const PetscSFNode *iremote; 2565 PetscSFNode *tmp_rootdata; 2566 PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]); 2567 2568 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2569 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2570 /* create helper SF with contiguous leaves */ 2571 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2572 PetscCall(PetscSFSetUp(tmp_sf)); 2573 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2574 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2575 for (i = 0; i < nr; i++) { 2576 tmp_rootdata[i].index = i + rootOffset; 2577 tmp_rootdata[i].rank = (PetscInt)rank; 2578 } 2579 rootOffset += nr; 2580 } else { 2581 for (i = 0; i < nr; i++) { 2582 tmp_rootdata[i].index = i; 2583 tmp_rootdata[i].rank = (PetscInt)rank; 2584 } 2585 } 2586 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2587 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2588 PetscCall(PetscSFDestroy(&tmp_sf)); 2589 PetscCall(PetscFree(tmp_rootdata)); 2590 } 2591 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2592 2593 /* Build the new SF */ 2594 PetscCall(PetscSFCreate(comm, newsf)); 2595 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2596 } else { 2597 /* Build the new SF */ 2598 PetscCall(PetscSFCreate(comm, newsf)); 2599 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2600 } 2601 PetscCall(PetscSFSetUp(*newsf)); 2602 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2603 PetscCall(PetscLayoutDestroy(&glayout)); 2604 PetscCall(PetscFree(gremote)); 2605 PetscCall(PetscFree(leafArrayOffsets)); 2606 PetscFunctionReturn(PETSC_SUCCESS); 2607 } 2608