1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #include <petscdevice_cuda.h> 9 #endif 10 11 #if defined(PETSC_HAVE_HIP) 12 #include <hip/hip_runtime.h> 13 #endif 14 15 #if defined(PETSC_CLANG_STATIC_ANALYZER) 16 extern void PetscSFCheckGraphSet(PetscSF, int); 17 #else 18 #if defined(PETSC_USE_DEBUG) 19 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME) 20 #else 21 #define PetscSFCheckGraphSet(sf, arg) \ 22 do { \ 23 } while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 29 30 /*@ 31 PetscSFCreate - create a star forest communication context 32 33 Collective 34 35 Input Parameter: 36 . comm - communicator on which the star forest will operate 37 38 Output Parameter: 39 . sf - new star forest context 40 41 Options Database Key: 42 . -sf_type type - value of type may be 43 .vb 44 basic -Use MPI persistent Isend/Irecv for communication (Default) 45 window -Use MPI-3 one-sided window for communication 46 neighbor -Use MPI-3 neighborhood collectives for communication 47 .ve 48 49 Level: intermediate 50 51 Note: 52 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 53 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 54 `SF`s are optimized and they have better performance than the general `SF`s. 55 56 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 57 @*/ 58 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 59 { 60 PetscSF b; 61 62 PetscFunctionBegin; 63 PetscAssertPointer(sf, 2); 64 PetscCall(PetscSFInitializePackage()); 65 66 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 67 68 b->nroots = -1; 69 b->nleaves = -1; 70 b->minleaf = PETSC_MAX_INT; 71 b->maxleaf = PETSC_MIN_INT; 72 b->nranks = -1; 73 b->rankorder = PETSC_TRUE; 74 b->ingroup = MPI_GROUP_NULL; 75 b->outgroup = MPI_GROUP_NULL; 76 b->graphset = PETSC_FALSE; 77 #if defined(PETSC_HAVE_DEVICE) 78 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 79 b->use_stream_aware_mpi = PETSC_FALSE; 80 b->unknown_input_stream = PETSC_FALSE; 81 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 82 b->backend = PETSCSF_BACKEND_KOKKOS; 83 #elif defined(PETSC_HAVE_CUDA) 84 b->backend = PETSCSF_BACKEND_CUDA; 85 #elif defined(PETSC_HAVE_HIP) 86 b->backend = PETSCSF_BACKEND_HIP; 87 #endif 88 89 #if defined(PETSC_HAVE_NVSHMEM) 90 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 91 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 92 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 93 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 94 #endif 95 #endif 96 b->vscat.from_n = -1; 97 b->vscat.to_n = -1; 98 b->vscat.unit = MPIU_SCALAR; 99 *sf = b; 100 PetscFunctionReturn(PETSC_SUCCESS); 101 } 102 103 /*@ 104 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 105 106 Collective 107 108 Input Parameter: 109 . sf - star forest 110 111 Level: advanced 112 113 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 114 @*/ 115 PetscErrorCode PetscSFReset(PetscSF sf) 116 { 117 PetscFunctionBegin; 118 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 119 PetscTryTypeMethod(sf, Reset); 120 sf->nroots = -1; 121 sf->nleaves = -1; 122 sf->minleaf = PETSC_MAX_INT; 123 sf->maxleaf = PETSC_MIN_INT; 124 sf->mine = NULL; 125 sf->remote = NULL; 126 sf->graphset = PETSC_FALSE; 127 PetscCall(PetscFree(sf->mine_alloc)); 128 PetscCall(PetscFree(sf->remote_alloc)); 129 sf->nranks = -1; 130 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 131 sf->degreeknown = PETSC_FALSE; 132 PetscCall(PetscFree(sf->degree)); 133 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 134 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 135 if (sf->multi) sf->multi->multi = NULL; 136 PetscCall(PetscSFDestroy(&sf->multi)); 137 PetscCall(PetscLayoutDestroy(&sf->map)); 138 139 #if defined(PETSC_HAVE_DEVICE) 140 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 141 #endif 142 143 sf->setupcalled = PETSC_FALSE; 144 PetscFunctionReturn(PETSC_SUCCESS); 145 } 146 147 /*@C 148 PetscSFSetType - Set the `PetscSF` communication implementation 149 150 Collective 151 152 Input Parameters: 153 + sf - the `PetscSF` context 154 - type - a known method 155 .vb 156 PETSCSFWINDOW - MPI-2/3 one-sided 157 PETSCSFBASIC - basic implementation using MPI-1 two-sided 158 .ve 159 160 Options Database Key: 161 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 162 163 Level: intermediate 164 165 Notes: 166 See `PetscSFType` for possible values 167 168 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 169 @*/ 170 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 171 { 172 PetscBool match; 173 PetscErrorCode (*r)(PetscSF); 174 175 PetscFunctionBegin; 176 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 177 PetscAssertPointer(type, 2); 178 179 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 180 if (match) PetscFunctionReturn(PETSC_SUCCESS); 181 182 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 183 PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 184 /* Destroy the previous PetscSF implementation context */ 185 PetscTryTypeMethod(sf, Destroy); 186 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 187 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 188 PetscCall((*r)(sf)); 189 PetscFunctionReturn(PETSC_SUCCESS); 190 } 191 192 /*@C 193 PetscSFGetType - Get the `PetscSF` communication implementation 194 195 Not Collective 196 197 Input Parameter: 198 . sf - the `PetscSF` context 199 200 Output Parameter: 201 . type - the `PetscSF` type name 202 203 Level: intermediate 204 205 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 206 @*/ 207 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 208 { 209 PetscFunctionBegin; 210 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 211 PetscAssertPointer(type, 2); 212 *type = ((PetscObject)sf)->type_name; 213 PetscFunctionReturn(PETSC_SUCCESS); 214 } 215 216 /*@C 217 PetscSFDestroy - destroy a star forest 218 219 Collective 220 221 Input Parameter: 222 . sf - address of star forest 223 224 Level: intermediate 225 226 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 227 @*/ 228 PetscErrorCode PetscSFDestroy(PetscSF *sf) 229 { 230 PetscFunctionBegin; 231 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 232 PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1); 233 if (--((PetscObject)(*sf))->refct > 0) { 234 *sf = NULL; 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 PetscCall(PetscSFReset(*sf)); 238 PetscTryTypeMethod((*sf), Destroy); 239 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 240 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 241 #if defined(PETSC_HAVE_MPIX_STREAM) 242 if ((*sf)->use_stream_aware_mpi) { 243 PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream)); 244 PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm)); 245 } 246 #endif 247 PetscCall(PetscHeaderDestroy(sf)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 252 { 253 PetscInt i, nleaves; 254 PetscMPIInt size; 255 const PetscInt *ilocal; 256 const PetscSFNode *iremote; 257 258 PetscFunctionBegin; 259 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 260 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 261 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 262 for (i = 0; i < nleaves; i++) { 263 const PetscInt rank = iremote[i].rank; 264 const PetscInt remote = iremote[i].index; 265 const PetscInt leaf = ilocal ? ilocal[i] : i; 266 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 267 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 268 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 269 } 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 /*@ 274 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 275 276 Collective 277 278 Input Parameter: 279 . sf - star forest communication object 280 281 Level: beginner 282 283 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 284 @*/ 285 PetscErrorCode PetscSFSetUp(PetscSF sf) 286 { 287 PetscFunctionBegin; 288 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 289 PetscSFCheckGraphSet(sf, 1); 290 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 291 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 292 PetscCall(PetscSFCheckGraphValid_Private(sf)); 293 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 294 PetscTryTypeMethod(sf, SetUp); 295 #if defined(PETSC_HAVE_CUDA) 296 if (sf->backend == PETSCSF_BACKEND_CUDA) { 297 sf->ops->Malloc = PetscSFMalloc_CUDA; 298 sf->ops->Free = PetscSFFree_CUDA; 299 } 300 #endif 301 #if defined(PETSC_HAVE_HIP) 302 if (sf->backend == PETSCSF_BACKEND_HIP) { 303 sf->ops->Malloc = PetscSFMalloc_HIP; 304 sf->ops->Free = PetscSFFree_HIP; 305 } 306 #endif 307 308 #if defined(PETSC_HAVE_KOKKOS) 309 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 310 sf->ops->Malloc = PetscSFMalloc_Kokkos; 311 sf->ops->Free = PetscSFFree_Kokkos; 312 } 313 #endif 314 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 315 sf->setupcalled = PETSC_TRUE; 316 PetscFunctionReturn(PETSC_SUCCESS); 317 } 318 319 /*@ 320 PetscSFSetFromOptions - set `PetscSF` options using the options database 321 322 Logically Collective 323 324 Input Parameter: 325 . sf - star forest 326 327 Options Database Keys: 328 + -sf_type - implementation type, see `PetscSFSetType()` 329 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 330 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 331 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 332 If true, this option only works with `-use_gpu_aware_mpi 1`. 333 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 334 If true, this option only works with `-use_gpu_aware_mpi 1`. 335 336 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos. 337 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 338 the only available is kokkos. 339 340 Level: intermediate 341 342 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 343 @*/ 344 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 345 { 346 PetscSFType deft; 347 char type[256]; 348 PetscBool flg; 349 350 PetscFunctionBegin; 351 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 352 PetscObjectOptionsBegin((PetscObject)sf); 353 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 354 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 355 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 356 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 357 #if defined(PETSC_HAVE_DEVICE) 358 { 359 char backendstr[32] = {0}; 360 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 361 /* Change the defaults set in PetscSFCreate() with command line options */ 362 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 363 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 364 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 365 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 366 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 367 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 368 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 369 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 370 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 371 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 372 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 373 #elif defined(PETSC_HAVE_KOKKOS) 374 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 375 #endif 376 377 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM) 378 if (sf->use_stream_aware_mpi) { 379 MPI_Info info; 380 381 PetscCallMPI(MPI_Info_create(&info)); 382 PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t")); 383 PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream))); 384 PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream)); 385 PetscCallMPI(MPI_Info_free(&info)); 386 PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm)); 387 } 388 #endif 389 } 390 #endif 391 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 392 PetscOptionsEnd(); 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 396 /*@ 397 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 398 399 Logically Collective 400 401 Input Parameters: 402 + sf - star forest 403 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 404 405 Level: advanced 406 407 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 408 @*/ 409 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 410 { 411 PetscFunctionBegin; 412 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 413 PetscValidLogicalCollectiveBool(sf, flg, 2); 414 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 415 sf->rankorder = flg; 416 PetscFunctionReturn(PETSC_SUCCESS); 417 } 418 419 /*@C 420 PetscSFSetGraph - Set a parallel star forest 421 422 Collective 423 424 Input Parameters: 425 + sf - star forest 426 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 427 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 428 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 429 during setup in debug mode) 430 . localmode - copy mode for `ilocal` 431 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 432 during setup in debug mode) 433 - remotemode - copy mode for `iremote` 434 435 Level: intermediate 436 437 Notes: 438 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 439 440 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 441 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 442 PETSc might modify the respective array; 443 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 444 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 445 446 Fortran Notes: 447 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 448 449 Developer Notes: 450 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 451 This also allows to compare leaf sets of two `PetscSF`s easily. 452 453 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 454 @*/ 455 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 456 { 457 PetscBool unique, contiguous; 458 459 PetscFunctionBegin; 460 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 461 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 462 if (nleaves > 0) PetscAssertPointer(iremote, 6); 463 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 464 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 465 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 466 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 467 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 468 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 469 470 if (sf->nroots >= 0) { /* Reset only if graph already set */ 471 PetscCall(PetscSFReset(sf)); 472 } 473 474 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 475 476 sf->nroots = nroots; 477 sf->nleaves = nleaves; 478 479 if (localmode == PETSC_COPY_VALUES && ilocal) { 480 PetscInt *tlocal = NULL; 481 482 PetscCall(PetscMalloc1(nleaves, &tlocal)); 483 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 484 ilocal = tlocal; 485 } 486 if (remotemode == PETSC_COPY_VALUES) { 487 PetscSFNode *tremote = NULL; 488 489 PetscCall(PetscMalloc1(nleaves, &tremote)); 490 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 491 iremote = tremote; 492 } 493 494 if (nleaves && ilocal) { 495 PetscSFNode work; 496 497 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 498 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 499 unique = PetscNot(unique); 500 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 501 sf->minleaf = ilocal[0]; 502 sf->maxleaf = ilocal[nleaves - 1]; 503 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 504 } else { 505 sf->minleaf = 0; 506 sf->maxleaf = nleaves - 1; 507 unique = PETSC_TRUE; 508 contiguous = PETSC_TRUE; 509 } 510 511 if (contiguous) { 512 if (localmode == PETSC_USE_POINTER) { 513 ilocal = NULL; 514 } else { 515 PetscCall(PetscFree(ilocal)); 516 } 517 } 518 sf->mine = ilocal; 519 if (localmode == PETSC_USE_POINTER) { 520 sf->mine_alloc = NULL; 521 } else { 522 sf->mine_alloc = ilocal; 523 } 524 sf->remote = iremote; 525 if (remotemode == PETSC_USE_POINTER) { 526 sf->remote_alloc = NULL; 527 } else { 528 sf->remote_alloc = iremote; 529 } 530 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 531 sf->graphset = PETSC_TRUE; 532 PetscFunctionReturn(PETSC_SUCCESS); 533 } 534 535 /*@ 536 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 537 538 Collective 539 540 Input Parameters: 541 + sf - The `PetscSF` 542 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 543 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 544 545 Level: intermediate 546 547 Notes: 548 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 549 `n` and `N` are the local and global sizes of `x` respectively. 550 551 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 552 sequential vectors `y` on all MPI processes. 553 554 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 555 sequential vector `y` on rank 0. 556 557 In above cases, entries of `x` are roots and entries of `y` are leaves. 558 559 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 560 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 561 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 562 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 563 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 564 565 In this case, roots and leaves are symmetric. 566 567 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 568 @*/ 569 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 570 { 571 MPI_Comm comm; 572 PetscInt n, N, res[2]; 573 PetscMPIInt rank, size; 574 PetscSFType type; 575 576 PetscFunctionBegin; 577 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 578 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2); 579 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 580 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 581 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 582 PetscCallMPI(MPI_Comm_size(comm, &size)); 583 584 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 585 type = PETSCSFALLTOALL; 586 PetscCall(PetscLayoutCreate(comm, &sf->map)); 587 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 588 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 589 PetscCall(PetscLayoutSetUp(sf->map)); 590 } else { 591 PetscCall(PetscLayoutGetLocalSize(map, &n)); 592 PetscCall(PetscLayoutGetSize(map, &N)); 593 res[0] = n; 594 res[1] = -n; 595 /* Check if n are same over all ranks so that we can optimize it */ 596 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 597 if (res[0] == -res[1]) { /* same n */ 598 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 599 } else { 600 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 601 } 602 PetscCall(PetscLayoutReference(map, &sf->map)); 603 } 604 PetscCall(PetscSFSetType(sf, type)); 605 606 sf->pattern = pattern; 607 sf->mine = NULL; /* Contiguous */ 608 609 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 610 Also set other easy stuff. 611 */ 612 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 613 sf->nleaves = N; 614 sf->nroots = n; 615 sf->nranks = size; 616 sf->minleaf = 0; 617 sf->maxleaf = N - 1; 618 } else if (pattern == PETSCSF_PATTERN_GATHER) { 619 sf->nleaves = rank ? 0 : N; 620 sf->nroots = n; 621 sf->nranks = rank ? 0 : size; 622 sf->minleaf = 0; 623 sf->maxleaf = rank ? -1 : N - 1; 624 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 625 sf->nleaves = size; 626 sf->nroots = size; 627 sf->nranks = size; 628 sf->minleaf = 0; 629 sf->maxleaf = size - 1; 630 } 631 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 632 sf->graphset = PETSC_TRUE; 633 PetscFunctionReturn(PETSC_SUCCESS); 634 } 635 636 /*@ 637 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 638 639 Collective 640 641 Input Parameter: 642 . sf - star forest to invert 643 644 Output Parameter: 645 . isf - inverse of `sf` 646 647 Level: advanced 648 649 Notes: 650 All roots must have degree 1. 651 652 The local space may be a permutation, but cannot be sparse. 653 654 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 655 @*/ 656 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 657 { 658 PetscMPIInt rank; 659 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 660 const PetscInt *ilocal; 661 PetscSFNode *roots, *leaves; 662 663 PetscFunctionBegin; 664 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 665 PetscSFCheckGraphSet(sf, 1); 666 PetscAssertPointer(isf, 2); 667 668 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 669 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 670 671 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 672 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 673 for (i = 0; i < maxlocal; i++) { 674 leaves[i].rank = rank; 675 leaves[i].index = i; 676 } 677 for (i = 0; i < nroots; i++) { 678 roots[i].rank = -1; 679 roots[i].index = -1; 680 } 681 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 682 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 683 684 /* Check whether our leaves are sparse */ 685 for (i = 0, count = 0; i < nroots; i++) 686 if (roots[i].rank >= 0) count++; 687 if (count == nroots) newilocal = NULL; 688 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 689 for (i = 0, count = 0; i < nroots; i++) { 690 if (roots[i].rank >= 0) { 691 newilocal[count] = i; 692 roots[count].rank = roots[i].rank; 693 roots[count].index = roots[i].index; 694 count++; 695 } 696 } 697 } 698 699 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 700 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 701 PetscCall(PetscFree2(roots, leaves)); 702 PetscFunctionReturn(PETSC_SUCCESS); 703 } 704 705 /*@ 706 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 707 708 Collective 709 710 Input Parameters: 711 + sf - communication object to duplicate 712 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 713 714 Output Parameter: 715 . newsf - new communication object 716 717 Level: beginner 718 719 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 720 @*/ 721 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 722 { 723 PetscSFType type; 724 MPI_Datatype dtype = MPIU_SCALAR; 725 726 PetscFunctionBegin; 727 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 728 PetscValidLogicalCollectiveEnum(sf, opt, 2); 729 PetscAssertPointer(newsf, 3); 730 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 731 PetscCall(PetscSFGetType(sf, &type)); 732 if (type) PetscCall(PetscSFSetType(*newsf, type)); 733 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 734 if (opt == PETSCSF_DUPLICATE_GRAPH) { 735 PetscSFCheckGraphSet(sf, 1); 736 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 737 PetscInt nroots, nleaves; 738 const PetscInt *ilocal; 739 const PetscSFNode *iremote; 740 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 741 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 742 } else { 743 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 744 } 745 } 746 /* Since oldtype is committed, so is newtype, according to MPI */ 747 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 748 (*newsf)->vscat.bs = sf->vscat.bs; 749 (*newsf)->vscat.unit = dtype; 750 (*newsf)->vscat.to_n = sf->vscat.to_n; 751 (*newsf)->vscat.from_n = sf->vscat.from_n; 752 /* Do not copy lsf. Build it on demand since it is rarely used */ 753 754 #if defined(PETSC_HAVE_DEVICE) 755 (*newsf)->backend = sf->backend; 756 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 757 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 758 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 759 #endif 760 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 761 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 762 PetscFunctionReturn(PETSC_SUCCESS); 763 } 764 765 /*@C 766 PetscSFGetGraph - Get the graph specifying a parallel star forest 767 768 Not Collective 769 770 Input Parameter: 771 . sf - star forest 772 773 Output Parameters: 774 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 775 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 776 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 777 - iremote - remote locations of root vertices for each leaf on the current process 778 779 Level: intermediate 780 781 Notes: 782 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 783 784 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 785 786 Fortran Notes: 787 The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you 788 want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array. 789 790 To check for a `NULL` `ilocal` use 791 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 792 793 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 794 @*/ 795 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 796 { 797 PetscFunctionBegin; 798 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 799 if (sf->ops->GetGraph) { 800 PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote)); 801 } else { 802 if (nroots) *nroots = sf->nroots; 803 if (nleaves) *nleaves = sf->nleaves; 804 if (ilocal) *ilocal = sf->mine; 805 if (iremote) *iremote = sf->remote; 806 } 807 PetscFunctionReturn(PETSC_SUCCESS); 808 } 809 810 /*@ 811 PetscSFGetLeafRange - Get the active leaf ranges 812 813 Not Collective 814 815 Input Parameter: 816 . sf - star forest 817 818 Output Parameters: 819 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 820 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 821 822 Level: developer 823 824 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 825 @*/ 826 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 827 { 828 PetscFunctionBegin; 829 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 830 PetscSFCheckGraphSet(sf, 1); 831 if (minleaf) *minleaf = sf->minleaf; 832 if (maxleaf) *maxleaf = sf->maxleaf; 833 PetscFunctionReturn(PETSC_SUCCESS); 834 } 835 836 /*@C 837 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 838 839 Collective 840 841 Input Parameters: 842 + A - the star forest 843 . obj - Optional object that provides the prefix for the option names 844 - name - command line option 845 846 Level: intermediate 847 848 Note: 849 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 850 851 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 852 @*/ 853 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 854 { 855 PetscFunctionBegin; 856 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 857 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 858 PetscFunctionReturn(PETSC_SUCCESS); 859 } 860 861 /*@C 862 PetscSFView - view a star forest 863 864 Collective 865 866 Input Parameters: 867 + sf - star forest 868 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 869 870 Level: beginner 871 872 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 873 @*/ 874 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 875 { 876 PetscBool iascii; 877 PetscViewerFormat format; 878 879 PetscFunctionBegin; 880 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 881 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 882 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 883 PetscCheckSameComm(sf, 1, viewer, 2); 884 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 885 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 886 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 887 PetscMPIInt rank; 888 PetscInt ii, i, j; 889 890 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 891 PetscCall(PetscViewerASCIIPushTab(viewer)); 892 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 893 if (!sf->graphset) { 894 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 895 PetscCall(PetscViewerASCIIPopTab(viewer)); 896 PetscFunctionReturn(PETSC_SUCCESS); 897 } 898 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 899 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 900 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 901 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 902 PetscCall(PetscViewerFlush(viewer)); 903 PetscCall(PetscViewerGetFormat(viewer, &format)); 904 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 905 PetscMPIInt *tmpranks, *perm; 906 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 907 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 908 for (i = 0; i < sf->nranks; i++) perm[i] = i; 909 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 910 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 911 for (ii = 0; ii < sf->nranks; ii++) { 912 i = perm[ii]; 913 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 914 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 915 } 916 PetscCall(PetscFree2(tmpranks, perm)); 917 } 918 PetscCall(PetscViewerFlush(viewer)); 919 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 920 } 921 PetscCall(PetscViewerASCIIPopTab(viewer)); 922 } 923 PetscTryTypeMethod(sf, View, viewer); 924 PetscFunctionReturn(PETSC_SUCCESS); 925 } 926 927 /*@C 928 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 929 930 Not Collective 931 932 Input Parameter: 933 . sf - star forest 934 935 Output Parameters: 936 + nranks - number of ranks referenced by local part 937 . ranks - [`nranks`] array of ranks 938 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 939 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank 940 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank 941 942 Level: developer 943 944 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 945 @*/ 946 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 947 { 948 PetscFunctionBegin; 949 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 950 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 951 if (sf->ops->GetRootRanks) { 952 PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote)); 953 } else { 954 /* The generic implementation */ 955 if (nranks) *nranks = sf->nranks; 956 if (ranks) *ranks = sf->ranks; 957 if (roffset) *roffset = sf->roffset; 958 if (rmine) *rmine = sf->rmine; 959 if (rremote) *rremote = sf->rremote; 960 } 961 PetscFunctionReturn(PETSC_SUCCESS); 962 } 963 964 /*@C 965 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 966 967 Not Collective 968 969 Input Parameter: 970 . sf - star forest 971 972 Output Parameters: 973 + niranks - number of leaf ranks referencing roots on this process 974 . iranks - [`niranks`] array of ranks 975 . ioffset - [`niranks`+1] offset in `irootloc` for each rank 976 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 977 978 Level: developer 979 980 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 981 @*/ 982 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 983 { 984 PetscFunctionBegin; 985 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 986 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 987 if (sf->ops->GetLeafRanks) { 988 PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc)); 989 } else { 990 PetscSFType type; 991 PetscCall(PetscSFGetType(sf, &type)); 992 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 993 } 994 PetscFunctionReturn(PETSC_SUCCESS); 995 } 996 997 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 998 { 999 PetscInt i; 1000 for (i = 0; i < n; i++) { 1001 if (needle == list[i]) return PETSC_TRUE; 1002 } 1003 return PETSC_FALSE; 1004 } 1005 1006 /*@C 1007 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 1008 1009 Collective 1010 1011 Input Parameters: 1012 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 1013 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 1014 1015 Level: developer 1016 1017 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 1018 @*/ 1019 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1020 { 1021 PetscHMapI table; 1022 PetscHashIter pos; 1023 PetscMPIInt size, groupsize, *groupranks; 1024 PetscInt *rcount, *ranks; 1025 PetscInt i, irank = -1, orank = -1; 1026 1027 PetscFunctionBegin; 1028 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1029 PetscSFCheckGraphSet(sf, 1); 1030 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1031 PetscCall(PetscHMapICreateWithSize(10, &table)); 1032 for (i = 0; i < sf->nleaves; i++) { 1033 /* Log 1-based rank */ 1034 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1035 } 1036 PetscCall(PetscHMapIGetSize(table, &sf->nranks)); 1037 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1038 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1039 PetscHashIterBegin(table, pos); 1040 for (i = 0; i < sf->nranks; i++) { 1041 PetscHashIterGetKey(table, pos, ranks[i]); 1042 PetscHashIterGetVal(table, pos, rcount[i]); 1043 PetscHashIterNext(table, pos); 1044 ranks[i]--; /* Convert back to 0-based */ 1045 } 1046 PetscCall(PetscHMapIDestroy(&table)); 1047 1048 /* We expect that dgroup is reliably "small" while nranks could be large */ 1049 { 1050 MPI_Group group = MPI_GROUP_NULL; 1051 PetscMPIInt *dgroupranks; 1052 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1053 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1054 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1055 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1056 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1057 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1058 PetscCallMPI(MPI_Group_free(&group)); 1059 PetscCall(PetscFree(dgroupranks)); 1060 } 1061 1062 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1063 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1064 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1065 if (InList(ranks[i], groupsize, groupranks)) break; 1066 } 1067 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1068 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1069 } 1070 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1071 PetscInt tmprank, tmpcount; 1072 1073 tmprank = ranks[i]; 1074 tmpcount = rcount[i]; 1075 ranks[i] = ranks[sf->ndranks]; 1076 rcount[i] = rcount[sf->ndranks]; 1077 ranks[sf->ndranks] = tmprank; 1078 rcount[sf->ndranks] = tmpcount; 1079 sf->ndranks++; 1080 } 1081 } 1082 PetscCall(PetscFree(groupranks)); 1083 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1084 if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1085 sf->roffset[0] = 0; 1086 for (i = 0; i < sf->nranks; i++) { 1087 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1088 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1089 rcount[i] = 0; 1090 } 1091 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1092 /* short circuit */ 1093 if (orank != sf->remote[i].rank) { 1094 /* Search for index of iremote[i].rank in sf->ranks */ 1095 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1096 if (irank < 0) { 1097 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1098 if (irank >= 0) irank += sf->ndranks; 1099 } 1100 orank = sf->remote[i].rank; 1101 } 1102 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1103 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1104 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1105 rcount[irank]++; 1106 } 1107 PetscCall(PetscFree2(rcount, ranks)); 1108 PetscFunctionReturn(PETSC_SUCCESS); 1109 } 1110 1111 /*@C 1112 PetscSFGetGroups - gets incoming and outgoing process groups 1113 1114 Collective 1115 1116 Input Parameter: 1117 . sf - star forest 1118 1119 Output Parameters: 1120 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1121 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1122 1123 Level: developer 1124 1125 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1126 @*/ 1127 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1128 { 1129 MPI_Group group = MPI_GROUP_NULL; 1130 1131 PetscFunctionBegin; 1132 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1133 if (sf->ingroup == MPI_GROUP_NULL) { 1134 PetscInt i; 1135 const PetscInt *indegree; 1136 PetscMPIInt rank, *outranks, *inranks; 1137 PetscSFNode *remote; 1138 PetscSF bgcount; 1139 1140 /* Compute the number of incoming ranks */ 1141 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1142 for (i = 0; i < sf->nranks; i++) { 1143 remote[i].rank = sf->ranks[i]; 1144 remote[i].index = 0; 1145 } 1146 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1147 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1148 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1149 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1150 /* Enumerate the incoming ranks */ 1151 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1152 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1153 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1154 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1155 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1156 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1157 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1158 PetscCallMPI(MPI_Group_free(&group)); 1159 PetscCall(PetscFree2(inranks, outranks)); 1160 PetscCall(PetscSFDestroy(&bgcount)); 1161 } 1162 *incoming = sf->ingroup; 1163 1164 if (sf->outgroup == MPI_GROUP_NULL) { 1165 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1166 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1167 PetscCallMPI(MPI_Group_free(&group)); 1168 } 1169 *outgoing = sf->outgroup; 1170 PetscFunctionReturn(PETSC_SUCCESS); 1171 } 1172 1173 /*@ 1174 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1175 1176 Collective 1177 1178 Input Parameter: 1179 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1180 1181 Output Parameter: 1182 . multi - star forest with split roots, such that each root has degree exactly 1 1183 1184 Level: developer 1185 1186 Note: 1187 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1188 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1189 edge, it is a candidate for future optimization that might involve its removal. 1190 1191 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1192 @*/ 1193 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1194 { 1195 PetscFunctionBegin; 1196 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1197 PetscAssertPointer(multi, 2); 1198 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1199 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1200 *multi = sf->multi; 1201 sf->multi->multi = sf->multi; 1202 PetscFunctionReturn(PETSC_SUCCESS); 1203 } 1204 if (!sf->multi) { 1205 const PetscInt *indegree; 1206 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1207 PetscSFNode *remote; 1208 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1209 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1210 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1211 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1212 inoffset[0] = 0; 1213 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1214 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1215 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1216 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1217 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1218 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1219 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1220 } 1221 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1222 for (i = 0; i < sf->nleaves; i++) { 1223 remote[i].rank = sf->remote[i].rank; 1224 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1225 } 1226 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1227 sf->multi->multi = sf->multi; 1228 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1229 if (sf->rankorder) { /* Sort the ranks */ 1230 PetscMPIInt rank; 1231 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1232 PetscSFNode *newremote; 1233 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1234 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1235 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1236 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1237 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1238 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1239 /* Sort the incoming ranks at each vertex, build the inverse map */ 1240 for (i = 0; i < sf->nroots; i++) { 1241 PetscInt j; 1242 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1243 if (inranks) PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset)); 1244 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1245 } 1246 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1247 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1248 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1249 for (i = 0; i < sf->nleaves; i++) { 1250 newremote[i].rank = sf->remote[i].rank; 1251 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1252 } 1253 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1254 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1255 } 1256 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1257 } 1258 *multi = sf->multi; 1259 PetscFunctionReturn(PETSC_SUCCESS); 1260 } 1261 1262 /*@C 1263 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1264 1265 Collective 1266 1267 Input Parameters: 1268 + sf - original star forest 1269 . nselected - number of selected roots on this process 1270 - selected - indices of the selected roots on this process 1271 1272 Output Parameter: 1273 . esf - new star forest 1274 1275 Level: advanced 1276 1277 Note: 1278 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1279 be done by calling PetscSFGetGraph(). 1280 1281 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1282 @*/ 1283 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1284 { 1285 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1286 const PetscInt *ilocal; 1287 signed char *rootdata, *leafdata, *leafmem; 1288 const PetscSFNode *iremote; 1289 PetscSFNode *new_iremote; 1290 MPI_Comm comm; 1291 1292 PetscFunctionBegin; 1293 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1294 PetscSFCheckGraphSet(sf, 1); 1295 if (nselected) PetscAssertPointer(selected, 3); 1296 PetscAssertPointer(esf, 4); 1297 1298 PetscCall(PetscSFSetUp(sf)); 1299 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1300 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1301 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1302 1303 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1304 PetscBool dups; 1305 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1306 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1307 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1308 } 1309 1310 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1311 else { 1312 /* A generic version of creating embedded sf */ 1313 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1314 maxlocal = maxleaf - minleaf + 1; 1315 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1316 leafdata = leafmem - minleaf; 1317 /* Tag selected roots and bcast to leaves */ 1318 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1319 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1320 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1321 1322 /* Build esf with leaves that are still connected */ 1323 esf_nleaves = 0; 1324 for (i = 0; i < nleaves; i++) { 1325 j = ilocal ? ilocal[i] : i; 1326 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1327 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1328 */ 1329 esf_nleaves += (leafdata[j] ? 1 : 0); 1330 } 1331 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1332 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1333 for (i = n = 0; i < nleaves; i++) { 1334 j = ilocal ? ilocal[i] : i; 1335 if (leafdata[j]) { 1336 new_ilocal[n] = j; 1337 new_iremote[n].rank = iremote[i].rank; 1338 new_iremote[n].index = iremote[i].index; 1339 ++n; 1340 } 1341 } 1342 PetscCall(PetscSFCreate(comm, esf)); 1343 PetscCall(PetscSFSetFromOptions(*esf)); 1344 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1345 PetscCall(PetscFree2(rootdata, leafmem)); 1346 } 1347 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1348 PetscFunctionReturn(PETSC_SUCCESS); 1349 } 1350 1351 /*@C 1352 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1353 1354 Collective 1355 1356 Input Parameters: 1357 + sf - original star forest 1358 . nselected - number of selected leaves on this process 1359 - selected - indices of the selected leaves on this process 1360 1361 Output Parameter: 1362 . newsf - new star forest 1363 1364 Level: advanced 1365 1366 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1367 @*/ 1368 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1369 { 1370 const PetscSFNode *iremote; 1371 PetscSFNode *new_iremote; 1372 const PetscInt *ilocal; 1373 PetscInt i, nroots, *leaves, *new_ilocal; 1374 MPI_Comm comm; 1375 1376 PetscFunctionBegin; 1377 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1378 PetscSFCheckGraphSet(sf, 1); 1379 if (nselected) PetscAssertPointer(selected, 3); 1380 PetscAssertPointer(newsf, 4); 1381 1382 /* Uniq selected[] and put results in leaves[] */ 1383 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1384 PetscCall(PetscMalloc1(nselected, &leaves)); 1385 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1386 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1387 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1388 1389 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1390 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1391 else { 1392 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1393 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1394 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1395 for (i = 0; i < nselected; ++i) { 1396 const PetscInt l = leaves[i]; 1397 new_ilocal[i] = ilocal ? ilocal[l] : l; 1398 new_iremote[i].rank = iremote[l].rank; 1399 new_iremote[i].index = iremote[l].index; 1400 } 1401 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1402 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1403 } 1404 PetscCall(PetscFree(leaves)); 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 /*@C 1409 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1410 1411 Collective 1412 1413 Input Parameters: 1414 + sf - star forest on which to communicate 1415 . unit - data type associated with each node 1416 . rootdata - buffer to broadcast 1417 - op - operation to use for reduction 1418 1419 Output Parameter: 1420 . leafdata - buffer to be reduced with values from each leaf's respective root 1421 1422 Level: intermediate 1423 1424 Note: 1425 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1426 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1427 use `PetscSFBcastWithMemTypeBegin()` instead. 1428 1429 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1430 @*/ 1431 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1432 { 1433 PetscMemType rootmtype, leafmtype; 1434 1435 PetscFunctionBegin; 1436 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1437 PetscCall(PetscSFSetUp(sf)); 1438 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1439 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1440 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1441 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1442 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1443 PetscFunctionReturn(PETSC_SUCCESS); 1444 } 1445 1446 /*@C 1447 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1448 to `PetscSFBcastEnd()` 1449 1450 Collective 1451 1452 Input Parameters: 1453 + sf - star forest on which to communicate 1454 . unit - data type associated with each node 1455 . rootmtype - memory type of rootdata 1456 . rootdata - buffer to broadcast 1457 . leafmtype - memory type of leafdata 1458 - op - operation to use for reduction 1459 1460 Output Parameter: 1461 . leafdata - buffer to be reduced with values from each leaf's respective root 1462 1463 Level: intermediate 1464 1465 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1466 @*/ 1467 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1468 { 1469 PetscFunctionBegin; 1470 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1471 PetscCall(PetscSFSetUp(sf)); 1472 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1473 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1474 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1475 PetscFunctionReturn(PETSC_SUCCESS); 1476 } 1477 1478 /*@C 1479 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1480 1481 Collective 1482 1483 Input Parameters: 1484 + sf - star forest 1485 . unit - data type 1486 . rootdata - buffer to broadcast 1487 - op - operation to use for reduction 1488 1489 Output Parameter: 1490 . leafdata - buffer to be reduced with values from each leaf's respective root 1491 1492 Level: intermediate 1493 1494 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1495 @*/ 1496 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1497 { 1498 PetscFunctionBegin; 1499 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1500 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1501 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1502 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1503 PetscFunctionReturn(PETSC_SUCCESS); 1504 } 1505 1506 /*@C 1507 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1508 1509 Collective 1510 1511 Input Parameters: 1512 + sf - star forest 1513 . unit - data type 1514 . leafdata - values to reduce 1515 - op - reduction operation 1516 1517 Output Parameter: 1518 . rootdata - result of reduction of values from all leaves of each root 1519 1520 Level: intermediate 1521 1522 Note: 1523 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1524 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1525 use `PetscSFReduceWithMemTypeBegin()` instead. 1526 1527 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 1528 @*/ 1529 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1530 { 1531 PetscMemType rootmtype, leafmtype; 1532 1533 PetscFunctionBegin; 1534 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1535 PetscCall(PetscSFSetUp(sf)); 1536 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1537 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1538 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1539 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1540 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1541 PetscFunctionReturn(PETSC_SUCCESS); 1542 } 1543 1544 /*@C 1545 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1546 1547 Collective 1548 1549 Input Parameters: 1550 + sf - star forest 1551 . unit - data type 1552 . leafmtype - memory type of leafdata 1553 . leafdata - values to reduce 1554 . rootmtype - memory type of rootdata 1555 - op - reduction operation 1556 1557 Output Parameter: 1558 . rootdata - result of reduction of values from all leaves of each root 1559 1560 Level: intermediate 1561 1562 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1563 @*/ 1564 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1565 { 1566 PetscFunctionBegin; 1567 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1568 PetscCall(PetscSFSetUp(sf)); 1569 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1570 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1571 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1572 PetscFunctionReturn(PETSC_SUCCESS); 1573 } 1574 1575 /*@C 1576 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1577 1578 Collective 1579 1580 Input Parameters: 1581 + sf - star forest 1582 . unit - data type 1583 . leafdata - values to reduce 1584 - op - reduction operation 1585 1586 Output Parameter: 1587 . rootdata - result of reduction of values from all leaves of each root 1588 1589 Level: intermediate 1590 1591 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1592 @*/ 1593 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1594 { 1595 PetscFunctionBegin; 1596 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1597 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1598 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1599 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 /*@C 1604 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1605 to be completed with `PetscSFFetchAndOpEnd()` 1606 1607 Collective 1608 1609 Input Parameters: 1610 + sf - star forest 1611 . unit - data type 1612 . leafdata - leaf values to use in reduction 1613 - op - operation to use for reduction 1614 1615 Output Parameters: 1616 + rootdata - root values to be updated, input state is seen by first process to perform an update 1617 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1618 1619 Level: advanced 1620 1621 Note: 1622 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1623 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1624 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1625 integers. 1626 1627 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1628 @*/ 1629 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1630 { 1631 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1632 1633 PetscFunctionBegin; 1634 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1635 PetscCall(PetscSFSetUp(sf)); 1636 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1637 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1638 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1639 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1640 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1641 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1642 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1643 PetscFunctionReturn(PETSC_SUCCESS); 1644 } 1645 1646 /*@C 1647 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1648 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1649 1650 Collective 1651 1652 Input Parameters: 1653 + sf - star forest 1654 . unit - data type 1655 . rootmtype - memory type of rootdata 1656 . leafmtype - memory type of leafdata 1657 . leafdata - leaf values to use in reduction 1658 . leafupdatemtype - memory type of leafupdate 1659 - op - operation to use for reduction 1660 1661 Output Parameters: 1662 + rootdata - root values to be updated, input state is seen by first process to perform an update 1663 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1664 1665 Level: advanced 1666 1667 Note: 1668 See `PetscSFFetchAndOpBegin()` for more details. 1669 1670 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1671 @*/ 1672 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1673 { 1674 PetscFunctionBegin; 1675 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1676 PetscCall(PetscSFSetUp(sf)); 1677 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1678 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1679 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1680 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1681 PetscFunctionReturn(PETSC_SUCCESS); 1682 } 1683 1684 /*@C 1685 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1686 to fetch values from roots and update atomically by applying operation using my leaf value 1687 1688 Collective 1689 1690 Input Parameters: 1691 + sf - star forest 1692 . unit - data type 1693 . leafdata - leaf values to use in reduction 1694 - op - operation to use for reduction 1695 1696 Output Parameters: 1697 + rootdata - root values to be updated, input state is seen by first process to perform an update 1698 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1699 1700 Level: advanced 1701 1702 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1703 @*/ 1704 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1705 { 1706 PetscFunctionBegin; 1707 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1708 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1709 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1710 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1711 PetscFunctionReturn(PETSC_SUCCESS); 1712 } 1713 1714 /*@C 1715 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1716 1717 Collective 1718 1719 Input Parameter: 1720 . sf - star forest 1721 1722 Output Parameter: 1723 . degree - degree of each root vertex 1724 1725 Level: advanced 1726 1727 Note: 1728 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1729 1730 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1731 @*/ 1732 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) 1733 { 1734 PetscFunctionBegin; 1735 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1736 PetscSFCheckGraphSet(sf, 1); 1737 PetscAssertPointer(degree, 2); 1738 if (!sf->degreeknown) { 1739 PetscInt i, nroots = sf->nroots, maxlocal; 1740 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1741 maxlocal = sf->maxleaf - sf->minleaf + 1; 1742 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1743 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1744 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1745 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1746 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1747 } 1748 *degree = NULL; 1749 PetscFunctionReturn(PETSC_SUCCESS); 1750 } 1751 1752 /*@C 1753 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1754 1755 Collective 1756 1757 Input Parameter: 1758 . sf - star forest 1759 1760 Output Parameter: 1761 . degree - degree of each root vertex 1762 1763 Level: developer 1764 1765 Note: 1766 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1767 1768 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1769 @*/ 1770 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1771 { 1772 PetscFunctionBegin; 1773 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1774 PetscSFCheckGraphSet(sf, 1); 1775 PetscAssertPointer(degree, 2); 1776 if (!sf->degreeknown) { 1777 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1778 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1779 PetscCall(PetscFree(sf->degreetmp)); 1780 sf->degreeknown = PETSC_TRUE; 1781 } 1782 *degree = sf->degree; 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 /*@C 1787 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1788 Each multi-root is assigned index of the corresponding original root. 1789 1790 Collective 1791 1792 Input Parameters: 1793 + sf - star forest 1794 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1795 1796 Output Parameters: 1797 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1798 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1799 1800 Level: developer 1801 1802 Note: 1803 The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1804 1805 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1806 @*/ 1807 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1808 { 1809 PetscSF msf; 1810 PetscInt i, j, k, nroots, nmroots; 1811 1812 PetscFunctionBegin; 1813 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1814 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1815 if (nroots) PetscAssertPointer(degree, 2); 1816 if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3); 1817 PetscAssertPointer(multiRootsOrigNumbering, 4); 1818 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1819 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1820 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1821 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1822 if (!degree[i]) continue; 1823 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1824 } 1825 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1826 if (nMultiRoots) *nMultiRoots = nmroots; 1827 PetscFunctionReturn(PETSC_SUCCESS); 1828 } 1829 1830 /*@C 1831 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1832 1833 Collective 1834 1835 Input Parameters: 1836 + sf - star forest 1837 . unit - data type 1838 - leafdata - leaf data to gather to roots 1839 1840 Output Parameter: 1841 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1842 1843 Level: intermediate 1844 1845 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1846 @*/ 1847 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1848 { 1849 PetscSF multi = NULL; 1850 1851 PetscFunctionBegin; 1852 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1853 PetscCall(PetscSFSetUp(sf)); 1854 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1855 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1856 PetscFunctionReturn(PETSC_SUCCESS); 1857 } 1858 1859 /*@C 1860 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1861 1862 Collective 1863 1864 Input Parameters: 1865 + sf - star forest 1866 . unit - data type 1867 - leafdata - leaf data to gather to roots 1868 1869 Output Parameter: 1870 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1871 1872 Level: intermediate 1873 1874 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1875 @*/ 1876 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1877 { 1878 PetscSF multi = NULL; 1879 1880 PetscFunctionBegin; 1881 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1882 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1883 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 /*@C 1888 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1889 1890 Collective 1891 1892 Input Parameters: 1893 + sf - star forest 1894 . unit - data type 1895 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1896 1897 Output Parameter: 1898 . leafdata - leaf data to be update with personal data from each respective root 1899 1900 Level: intermediate 1901 1902 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 1903 @*/ 1904 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1905 { 1906 PetscSF multi = NULL; 1907 1908 PetscFunctionBegin; 1909 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1910 PetscCall(PetscSFSetUp(sf)); 1911 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1912 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1913 PetscFunctionReturn(PETSC_SUCCESS); 1914 } 1915 1916 /*@C 1917 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1918 1919 Collective 1920 1921 Input Parameters: 1922 + sf - star forest 1923 . unit - data type 1924 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1925 1926 Output Parameter: 1927 . leafdata - leaf data to be update with personal data from each respective root 1928 1929 Level: intermediate 1930 1931 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1932 @*/ 1933 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1934 { 1935 PetscSF multi = NULL; 1936 1937 PetscFunctionBegin; 1938 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1939 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1940 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1941 PetscFunctionReturn(PETSC_SUCCESS); 1942 } 1943 1944 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1945 { 1946 PetscInt i, n, nleaves; 1947 const PetscInt *ilocal = NULL; 1948 PetscHSetI seen; 1949 1950 PetscFunctionBegin; 1951 if (PetscDefined(USE_DEBUG)) { 1952 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1953 PetscCall(PetscHSetICreate(&seen)); 1954 for (i = 0; i < nleaves; i++) { 1955 const PetscInt leaf = ilocal ? ilocal[i] : i; 1956 PetscCall(PetscHSetIAdd(seen, leaf)); 1957 } 1958 PetscCall(PetscHSetIGetSize(seen, &n)); 1959 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 1960 PetscCall(PetscHSetIDestroy(&seen)); 1961 } 1962 PetscFunctionReturn(PETSC_SUCCESS); 1963 } 1964 1965 /*@ 1966 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 1967 1968 Input Parameters: 1969 + sfA - The first `PetscSF` 1970 - sfB - The second `PetscSF` 1971 1972 Output Parameter: 1973 . sfBA - The composite `PetscSF` 1974 1975 Level: developer 1976 1977 Notes: 1978 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 1979 forests, i.e. the same leaf is not connected with different roots. 1980 1981 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 1982 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 1983 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 1984 `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 1985 1986 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1987 @*/ 1988 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 1989 { 1990 const PetscSFNode *remotePointsA, *remotePointsB; 1991 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 1992 const PetscInt *localPointsA, *localPointsB; 1993 PetscInt *localPointsBA; 1994 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 1995 PetscBool denseB; 1996 1997 PetscFunctionBegin; 1998 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 1999 PetscSFCheckGraphSet(sfA, 1); 2000 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2001 PetscSFCheckGraphSet(sfB, 2); 2002 PetscCheckSameComm(sfA, 1, sfB, 2); 2003 PetscAssertPointer(sfBA, 3); 2004 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2005 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2006 2007 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2008 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2009 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 2010 numRootsB; otherwise, garbage will be broadcasted. 2011 Example (comm size = 1): 2012 sfA: 0 <- (0, 0) 2013 sfB: 100 <- (0, 0) 2014 101 <- (0, 1) 2015 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 2016 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 2017 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 2018 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2019 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2020 for (i = 0; i < numRootsB; i++) { 2021 reorderedRemotePointsA[i].rank = -1; 2022 reorderedRemotePointsA[i].index = -1; 2023 } 2024 for (i = 0; i < numLeavesA; i++) { 2025 PetscInt localp = localPointsA ? localPointsA[i] : i; 2026 2027 if (localp >= numRootsB) continue; 2028 reorderedRemotePointsA[localp] = remotePointsA[i]; 2029 } 2030 remotePointsA = reorderedRemotePointsA; 2031 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2032 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2033 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2034 leafdataB[i].rank = -1; 2035 leafdataB[i].index = -1; 2036 } 2037 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2038 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2039 PetscCall(PetscFree(reorderedRemotePointsA)); 2040 2041 denseB = (PetscBool)!localPointsB; 2042 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2043 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2044 else numLeavesBA++; 2045 } 2046 if (denseB) { 2047 localPointsBA = NULL; 2048 remotePointsBA = leafdataB; 2049 } else { 2050 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2051 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2052 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2053 const PetscInt l = localPointsB ? localPointsB[i] : i; 2054 2055 if (leafdataB[l - minleaf].rank == -1) continue; 2056 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2057 localPointsBA[numLeavesBA] = l; 2058 numLeavesBA++; 2059 } 2060 PetscCall(PetscFree(leafdataB)); 2061 } 2062 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2063 PetscCall(PetscSFSetFromOptions(*sfBA)); 2064 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2065 PetscFunctionReturn(PETSC_SUCCESS); 2066 } 2067 2068 /*@ 2069 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2070 2071 Input Parameters: 2072 + sfA - The first `PetscSF` 2073 - sfB - The second `PetscSF` 2074 2075 Output Parameter: 2076 . sfBA - The composite `PetscSF`. 2077 2078 Level: developer 2079 2080 Notes: 2081 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2082 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2083 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2084 2085 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2086 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2087 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 2088 on `sfA`, then 2089 a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 2090 2091 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2092 @*/ 2093 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2094 { 2095 const PetscSFNode *remotePointsA, *remotePointsB; 2096 PetscSFNode *remotePointsBA; 2097 const PetscInt *localPointsA, *localPointsB; 2098 PetscSFNode *reorderedRemotePointsA = NULL; 2099 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2100 MPI_Op op; 2101 #if defined(PETSC_USE_64BIT_INDICES) 2102 PetscBool iswin; 2103 #endif 2104 2105 PetscFunctionBegin; 2106 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2107 PetscSFCheckGraphSet(sfA, 1); 2108 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2109 PetscSFCheckGraphSet(sfB, 2); 2110 PetscCheckSameComm(sfA, 1, sfB, 2); 2111 PetscAssertPointer(sfBA, 3); 2112 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2113 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2114 2115 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2116 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2117 2118 /* TODO: Check roots of sfB have degree of 1 */ 2119 /* Once we implement it, we can replace the MPI_MAXLOC 2120 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2121 We use MPI_MAXLOC only to have a deterministic output from this routine if 2122 the root condition is not meet. 2123 */ 2124 op = MPI_MAXLOC; 2125 #if defined(PETSC_USE_64BIT_INDICES) 2126 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2127 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2128 if (iswin) op = MPI_REPLACE; 2129 #endif 2130 2131 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2132 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2133 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2134 reorderedRemotePointsA[i].rank = -1; 2135 reorderedRemotePointsA[i].index = -1; 2136 } 2137 if (localPointsA) { 2138 for (i = 0; i < numLeavesA; i++) { 2139 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2140 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2141 } 2142 } else { 2143 for (i = 0; i < numLeavesA; i++) { 2144 if (i > maxleaf || i < minleaf) continue; 2145 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2146 } 2147 } 2148 2149 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2150 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2151 for (i = 0; i < numRootsB; i++) { 2152 remotePointsBA[i].rank = -1; 2153 remotePointsBA[i].index = -1; 2154 } 2155 2156 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2157 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2158 PetscCall(PetscFree(reorderedRemotePointsA)); 2159 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2160 if (remotePointsBA[i].rank == -1) continue; 2161 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2162 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2163 localPointsBA[numLeavesBA] = i; 2164 numLeavesBA++; 2165 } 2166 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2167 PetscCall(PetscSFSetFromOptions(*sfBA)); 2168 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2169 PetscFunctionReturn(PETSC_SUCCESS); 2170 } 2171 2172 /* 2173 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2174 2175 Input Parameter: 2176 . sf - The global `PetscSF` 2177 2178 Output Parameter: 2179 . out - The local `PetscSF` 2180 2181 .seealso: `PetscSF`, `PetscSFCreate()` 2182 */ 2183 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2184 { 2185 MPI_Comm comm; 2186 PetscMPIInt myrank; 2187 const PetscInt *ilocal; 2188 const PetscSFNode *iremote; 2189 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2190 PetscSFNode *liremote; 2191 PetscSF lsf; 2192 2193 PetscFunctionBegin; 2194 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2195 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2196 else { 2197 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2198 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2199 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2200 2201 /* Find out local edges and build a local SF */ 2202 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2203 for (i = lnleaves = 0; i < nleaves; i++) { 2204 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2205 } 2206 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2207 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2208 2209 for (i = j = 0; i < nleaves; i++) { 2210 if (iremote[i].rank == (PetscInt)myrank) { 2211 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2212 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2213 liremote[j].index = iremote[i].index; 2214 j++; 2215 } 2216 } 2217 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2218 PetscCall(PetscSFSetFromOptions(lsf)); 2219 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2220 PetscCall(PetscSFSetUp(lsf)); 2221 *out = lsf; 2222 } 2223 PetscFunctionReturn(PETSC_SUCCESS); 2224 } 2225 2226 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2227 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2228 { 2229 PetscMemType rootmtype, leafmtype; 2230 2231 PetscFunctionBegin; 2232 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2233 PetscCall(PetscSFSetUp(sf)); 2234 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2235 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2236 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2237 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2238 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } 2241 2242 /*@ 2243 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2244 2245 Input Parameters: 2246 + comm - the communicator 2247 . nsfs - the number of input `PetscSF` 2248 . sfs - the array of input `PetscSF` 2249 . rootMode - the root mode specifying how roots are handled 2250 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2251 2252 Output Parameter: 2253 . newsf - The resulting `PetscSF` 2254 2255 Level: advanced 2256 2257 Notes: 2258 The communicator of all `PetscSF`s in `sfs` must be comm. 2259 2260 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2261 2262 The offsets in `leafOffsets` are added to the original leaf indices. 2263 2264 If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2265 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2266 2267 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2268 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2269 2270 All root modes retain the essential connectivity condition. 2271 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2272 Parameter `rootMode` controls how the input root spaces are combined. 2273 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2274 and is also the same in the output `PetscSF`. 2275 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2276 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2277 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2278 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2279 roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2280 the original root ranks are ignored. 2281 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2282 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 2283 to keep the load balancing. 2284 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 2285 2286 Example: 2287 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2288 .vb 2289 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2290 for m in {local,global,shared}; do 2291 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2292 done 2293 .ve 2294 we generate two identical `PetscSF`s sf_0 and sf_1, 2295 .vb 2296 PetscSF Object: sf_0 2 MPI processes 2297 type: basic 2298 rank #leaves #roots 2299 [ 0] 4 2 2300 [ 1] 4 2 2301 leaves roots roots in global numbering 2302 ( 0, 0) <- ( 0, 0) = 0 2303 ( 0, 1) <- ( 0, 1) = 1 2304 ( 0, 2) <- ( 1, 0) = 2 2305 ( 0, 3) <- ( 1, 1) = 3 2306 ( 1, 0) <- ( 0, 0) = 0 2307 ( 1, 1) <- ( 0, 1) = 1 2308 ( 1, 2) <- ( 1, 0) = 2 2309 ( 1, 3) <- ( 1, 1) = 3 2310 .ve 2311 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\: 2312 .vb 2313 rootMode = local: 2314 PetscSF Object: result_sf 2 MPI processes 2315 type: basic 2316 rank #leaves #roots 2317 [ 0] 8 4 2318 [ 1] 8 4 2319 leaves roots roots in global numbering 2320 ( 0, 0) <- ( 0, 0) = 0 2321 ( 0, 1) <- ( 0, 1) = 1 2322 ( 0, 2) <- ( 1, 0) = 4 2323 ( 0, 3) <- ( 1, 1) = 5 2324 ( 0, 4) <- ( 0, 2) = 2 2325 ( 0, 5) <- ( 0, 3) = 3 2326 ( 0, 6) <- ( 1, 2) = 6 2327 ( 0, 7) <- ( 1, 3) = 7 2328 ( 1, 0) <- ( 0, 0) = 0 2329 ( 1, 1) <- ( 0, 1) = 1 2330 ( 1, 2) <- ( 1, 0) = 4 2331 ( 1, 3) <- ( 1, 1) = 5 2332 ( 1, 4) <- ( 0, 2) = 2 2333 ( 1, 5) <- ( 0, 3) = 3 2334 ( 1, 6) <- ( 1, 2) = 6 2335 ( 1, 7) <- ( 1, 3) = 7 2336 2337 rootMode = global: 2338 PetscSF Object: result_sf 2 MPI processes 2339 type: basic 2340 rank #leaves #roots 2341 [ 0] 8 4 2342 [ 1] 8 4 2343 leaves roots roots in global numbering 2344 ( 0, 0) <- ( 0, 0) = 0 2345 ( 0, 1) <- ( 0, 1) = 1 2346 ( 0, 2) <- ( 0, 2) = 2 2347 ( 0, 3) <- ( 0, 3) = 3 2348 ( 0, 4) <- ( 1, 0) = 4 2349 ( 0, 5) <- ( 1, 1) = 5 2350 ( 0, 6) <- ( 1, 2) = 6 2351 ( 0, 7) <- ( 1, 3) = 7 2352 ( 1, 0) <- ( 0, 0) = 0 2353 ( 1, 1) <- ( 0, 1) = 1 2354 ( 1, 2) <- ( 0, 2) = 2 2355 ( 1, 3) <- ( 0, 3) = 3 2356 ( 1, 4) <- ( 1, 0) = 4 2357 ( 1, 5) <- ( 1, 1) = 5 2358 ( 1, 6) <- ( 1, 2) = 6 2359 ( 1, 7) <- ( 1, 3) = 7 2360 2361 rootMode = shared: 2362 PetscSF Object: result_sf 2 MPI processes 2363 type: basic 2364 rank #leaves #roots 2365 [ 0] 8 2 2366 [ 1] 8 2 2367 leaves roots roots in global numbering 2368 ( 0, 0) <- ( 0, 0) = 0 2369 ( 0, 1) <- ( 0, 1) = 1 2370 ( 0, 2) <- ( 1, 0) = 2 2371 ( 0, 3) <- ( 1, 1) = 3 2372 ( 0, 4) <- ( 0, 0) = 0 2373 ( 0, 5) <- ( 0, 1) = 1 2374 ( 0, 6) <- ( 1, 0) = 2 2375 ( 0, 7) <- ( 1, 1) = 3 2376 ( 1, 0) <- ( 0, 0) = 0 2377 ( 1, 1) <- ( 0, 1) = 1 2378 ( 1, 2) <- ( 1, 0) = 2 2379 ( 1, 3) <- ( 1, 1) = 3 2380 ( 1, 4) <- ( 0, 0) = 0 2381 ( 1, 5) <- ( 0, 1) = 1 2382 ( 1, 6) <- ( 1, 0) = 2 2383 ( 1, 7) <- ( 1, 1) = 3 2384 .ve 2385 2386 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2387 @*/ 2388 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2389 { 2390 PetscInt i, s, nLeaves, nRoots; 2391 PetscInt *leafArrayOffsets; 2392 PetscInt *ilocal_new; 2393 PetscSFNode *iremote_new; 2394 PetscBool all_ilocal_null = PETSC_FALSE; 2395 PetscLayout glayout = NULL; 2396 PetscInt *gremote = NULL; 2397 PetscMPIInt rank, size; 2398 2399 PetscFunctionBegin; 2400 if (PetscDefined(USE_DEBUG)) { 2401 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2402 2403 PetscCall(PetscSFCreate(comm, &dummy)); 2404 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2405 PetscAssertPointer(sfs, 3); 2406 for (i = 0; i < nsfs; i++) { 2407 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2408 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2409 } 2410 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2411 if (leafOffsets) PetscAssertPointer(leafOffsets, 5); 2412 PetscAssertPointer(newsf, 6); 2413 PetscCall(PetscSFDestroy(&dummy)); 2414 } 2415 if (!nsfs) { 2416 PetscCall(PetscSFCreate(comm, newsf)); 2417 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2418 PetscFunctionReturn(PETSC_SUCCESS); 2419 } 2420 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2421 PetscCallMPI(MPI_Comm_size(comm, &size)); 2422 2423 /* Calculate leaf array offsets */ 2424 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2425 leafArrayOffsets[0] = 0; 2426 for (s = 0; s < nsfs; s++) { 2427 PetscInt nl; 2428 2429 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2430 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2431 } 2432 nLeaves = leafArrayOffsets[nsfs]; 2433 2434 /* Calculate number of roots */ 2435 switch (rootMode) { 2436 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2437 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2438 if (PetscDefined(USE_DEBUG)) { 2439 for (s = 1; s < nsfs; s++) { 2440 PetscInt nr; 2441 2442 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2443 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2444 } 2445 } 2446 } break; 2447 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2448 /* Calculate also global layout in this case */ 2449 PetscInt *nls; 2450 PetscLayout *lts; 2451 PetscInt **inds; 2452 PetscInt j; 2453 PetscInt rootOffset = 0; 2454 2455 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2456 PetscCall(PetscLayoutCreate(comm, &glayout)); 2457 glayout->bs = 1; 2458 glayout->n = 0; 2459 glayout->N = 0; 2460 for (s = 0; s < nsfs; s++) { 2461 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2462 glayout->n += lts[s]->n; 2463 glayout->N += lts[s]->N; 2464 } 2465 PetscCall(PetscLayoutSetUp(glayout)); 2466 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2467 for (s = 0, j = 0; s < nsfs; s++) { 2468 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2469 rootOffset += lts[s]->N; 2470 PetscCall(PetscLayoutDestroy(<s[s])); 2471 PetscCall(PetscFree(inds[s])); 2472 } 2473 PetscCall(PetscFree3(lts, nls, inds)); 2474 nRoots = glayout->N; 2475 } break; 2476 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2477 /* nRoots calculated later in this case */ 2478 break; 2479 default: 2480 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2481 } 2482 2483 if (!leafOffsets) { 2484 all_ilocal_null = PETSC_TRUE; 2485 for (s = 0; s < nsfs; s++) { 2486 const PetscInt *ilocal; 2487 2488 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2489 if (ilocal) { 2490 all_ilocal_null = PETSC_FALSE; 2491 break; 2492 } 2493 } 2494 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2495 } 2496 2497 /* Renumber and concatenate local leaves */ 2498 ilocal_new = NULL; 2499 if (!all_ilocal_null) { 2500 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2501 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2502 for (s = 0; s < nsfs; s++) { 2503 const PetscInt *ilocal; 2504 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2505 PetscInt i, nleaves_l; 2506 2507 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2508 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2509 } 2510 } 2511 2512 /* Renumber and concatenate remote roots */ 2513 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2514 PetscInt rootOffset = 0; 2515 2516 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2517 for (i = 0; i < nLeaves; i++) { 2518 iremote_new[i].rank = -1; 2519 iremote_new[i].index = -1; 2520 } 2521 for (s = 0; s < nsfs; s++) { 2522 PetscInt i, nl, nr; 2523 PetscSF tmp_sf; 2524 const PetscSFNode *iremote; 2525 PetscSFNode *tmp_rootdata; 2526 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2527 2528 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2529 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2530 /* create helper SF with contiguous leaves */ 2531 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2532 PetscCall(PetscSFSetUp(tmp_sf)); 2533 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2534 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2535 for (i = 0; i < nr; i++) { 2536 tmp_rootdata[i].index = i + rootOffset; 2537 tmp_rootdata[i].rank = (PetscInt)rank; 2538 } 2539 rootOffset += nr; 2540 } else { 2541 for (i = 0; i < nr; i++) { 2542 tmp_rootdata[i].index = i; 2543 tmp_rootdata[i].rank = (PetscInt)rank; 2544 } 2545 } 2546 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2547 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2548 PetscCall(PetscSFDestroy(&tmp_sf)); 2549 PetscCall(PetscFree(tmp_rootdata)); 2550 } 2551 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2552 2553 /* Build the new SF */ 2554 PetscCall(PetscSFCreate(comm, newsf)); 2555 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2556 } else { 2557 /* Build the new SF */ 2558 PetscCall(PetscSFCreate(comm, newsf)); 2559 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2560 } 2561 PetscCall(PetscSFSetUp(*newsf)); 2562 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2563 PetscCall(PetscLayoutDestroy(&glayout)); 2564 PetscCall(PetscFree(gremote)); 2565 PetscCall(PetscFree(leafArrayOffsets)); 2566 PetscFunctionReturn(PETSC_SUCCESS); 2567 } 2568