1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME); 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Key: 41 . -sf_type type - value of type may be 42 .vb 43 basic -Use MPI persistent Isend/Irecv for communication (Default) 44 window -Use MPI-3 one-sided window for communication 45 neighbor -Use MPI-3 neighborhood collectives for communication 46 .ve 47 48 Level: intermediate 49 50 Note: 51 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 52 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 53 `SF`s are optimized and they have better performance than the general `SF`s. 54 55 .seealso: `PetscSF`, `PetscSFSetType`, PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 56 @*/ 57 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 58 { 59 PetscSF b; 60 61 PetscFunctionBegin; 62 PetscValidPointer(sf, 2); 63 PetscCall(PetscSFInitializePackage()); 64 65 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 66 67 b->nroots = -1; 68 b->nleaves = -1; 69 b->minleaf = PETSC_MAX_INT; 70 b->maxleaf = PETSC_MIN_INT; 71 b->nranks = -1; 72 b->rankorder = PETSC_TRUE; 73 b->ingroup = MPI_GROUP_NULL; 74 b->outgroup = MPI_GROUP_NULL; 75 b->graphset = PETSC_FALSE; 76 #if defined(PETSC_HAVE_DEVICE) 77 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 78 b->use_stream_aware_mpi = PETSC_FALSE; 79 b->unknown_input_stream = PETSC_FALSE; 80 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 81 b->backend = PETSCSF_BACKEND_KOKKOS; 82 #elif defined(PETSC_HAVE_CUDA) 83 b->backend = PETSCSF_BACKEND_CUDA; 84 #elif defined(PETSC_HAVE_HIP) 85 b->backend = PETSCSF_BACKEND_HIP; 86 #endif 87 88 #if defined(PETSC_HAVE_NVSHMEM) 89 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 90 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 91 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 92 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 93 #endif 94 #endif 95 b->vscat.from_n = -1; 96 b->vscat.to_n = -1; 97 b->vscat.unit = MPIU_SCALAR; 98 *sf = b; 99 PetscFunctionReturn(PETSC_SUCCESS); 100 } 101 102 /*@ 103 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 104 105 Collective 106 107 Input Parameter: 108 . sf - star forest 109 110 Level: advanced 111 112 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 113 @*/ 114 PetscErrorCode PetscSFReset(PetscSF sf) 115 { 116 PetscFunctionBegin; 117 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 118 PetscTryTypeMethod(sf, Reset); 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_MAX_INT; 122 sf->maxleaf = PETSC_MIN_INT; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 PetscCall(PetscFree(sf->mine_alloc)); 127 PetscCall(PetscFree(sf->remote_alloc)); 128 sf->nranks = -1; 129 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 130 sf->degreeknown = PETSC_FALSE; 131 PetscCall(PetscFree(sf->degree)); 132 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 133 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 134 if (sf->multi) sf->multi->multi = NULL; 135 PetscCall(PetscSFDestroy(&sf->multi)); 136 PetscCall(PetscLayoutDestroy(&sf->map)); 137 138 #if defined(PETSC_HAVE_DEVICE) 139 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 140 #endif 141 142 sf->setupcalled = PETSC_FALSE; 143 PetscFunctionReturn(PETSC_SUCCESS); 144 } 145 146 /*@C 147 PetscSFSetType - Set the `PetscSF` communication implementation 148 149 Collective 150 151 Input Parameters: 152 + sf - the `PetscSF` context 153 - type - a known method 154 .vb 155 PETSCSFWINDOW - MPI-2/3 one-sided 156 PETSCSFBASIC - basic implementation using MPI-1 two-sided 157 .ve 158 159 Options Database Key: 160 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods 161 162 Level: intermediate 163 164 Notes: 165 See `PetscSFType` for possible values 166 167 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()` 168 @*/ 169 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 170 { 171 PetscBool match; 172 PetscErrorCode (*r)(PetscSF); 173 174 PetscFunctionBegin; 175 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 176 PetscValidCharPointer(type, 2); 177 178 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 179 if (match) PetscFunctionReturn(PETSC_SUCCESS); 180 181 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 182 PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 183 /* Destroy the previous PetscSF implementation context */ 184 PetscTryTypeMethod(sf, Destroy); 185 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 186 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 187 PetscCall((*r)(sf)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 /*@C 192 PetscSFGetType - Get the `PetscSF` communication implementation 193 194 Not Collective 195 196 Input Parameter: 197 . sf - the `PetscSF` context 198 199 Output Parameter: 200 . type - the `PetscSF` type name 201 202 Level: intermediate 203 204 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 205 @*/ 206 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 207 { 208 PetscFunctionBegin; 209 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 210 PetscValidPointer(type, 2); 211 *type = ((PetscObject)sf)->type_name; 212 PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 215 /*@C 216 PetscSFDestroy - destroy a star forest 217 218 Collective 219 220 Input Parameter: 221 . sf - address of star forest 222 223 Level: intermediate 224 225 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 226 @*/ 227 PetscErrorCode PetscSFDestroy(PetscSF *sf) 228 { 229 PetscFunctionBegin; 230 if (!*sf) PetscFunctionReturn(PETSC_SUCCESS); 231 PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1); 232 if (--((PetscObject)(*sf))->refct > 0) { 233 *sf = NULL; 234 PetscFunctionReturn(PETSC_SUCCESS); 235 } 236 PetscCall(PetscSFReset(*sf)); 237 PetscTryTypeMethod((*sf), Destroy); 238 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 239 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 240 PetscCall(PetscHeaderDestroy(sf)); 241 PetscFunctionReturn(PETSC_SUCCESS); 242 } 243 244 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 245 { 246 PetscInt i, nleaves; 247 PetscMPIInt size; 248 const PetscInt *ilocal; 249 const PetscSFNode *iremote; 250 251 PetscFunctionBegin; 252 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS); 253 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 254 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 255 for (i = 0; i < nleaves; i++) { 256 const PetscInt rank = iremote[i].rank; 257 const PetscInt remote = iremote[i].index; 258 const PetscInt leaf = ilocal ? ilocal[i] : i; 259 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 260 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 261 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 262 } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 /*@ 267 PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication 268 269 Collective 270 271 Input Parameter: 272 . sf - star forest communication object 273 274 Level: beginner 275 276 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 277 @*/ 278 PetscErrorCode PetscSFSetUp(PetscSF sf) 279 { 280 PetscFunctionBegin; 281 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 282 PetscSFCheckGraphSet(sf, 1); 283 if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS); 284 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 285 PetscCall(PetscSFCheckGraphValid_Private(sf)); 286 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 287 PetscTryTypeMethod(sf, SetUp); 288 #if defined(PETSC_HAVE_CUDA) 289 if (sf->backend == PETSCSF_BACKEND_CUDA) { 290 sf->ops->Malloc = PetscSFMalloc_CUDA; 291 sf->ops->Free = PetscSFFree_CUDA; 292 } 293 #endif 294 #if defined(PETSC_HAVE_HIP) 295 if (sf->backend == PETSCSF_BACKEND_HIP) { 296 sf->ops->Malloc = PetscSFMalloc_HIP; 297 sf->ops->Free = PetscSFFree_HIP; 298 } 299 #endif 300 301 # 302 #if defined(PETSC_HAVE_KOKKOS) 303 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 304 sf->ops->Malloc = PetscSFMalloc_Kokkos; 305 sf->ops->Free = PetscSFFree_Kokkos; 306 } 307 #endif 308 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 309 sf->setupcalled = PETSC_TRUE; 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 /*@ 314 PetscSFSetFromOptions - set `PetscSF` options using the options database 315 316 Logically Collective 317 318 Input Parameter: 319 . sf - star forest 320 321 Options Database Keys: 322 + -sf_type - implementation type, see `PetscSFSetType()` 323 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 324 . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also 325 use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true). 326 If true, this option only works with `-use_gpu_aware_mpi 1`. 327 . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false). 328 If true, this option only works with `-use_gpu_aware_mpi 1`. 329 330 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda, hip and Kokkos. 331 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 332 the only available is kokkos. 333 334 Level: intermediate 335 336 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 337 @*/ 338 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 339 { 340 PetscSFType deft; 341 char type[256]; 342 PetscBool flg; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 346 PetscObjectOptionsBegin((PetscObject)sf); 347 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 348 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 349 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 350 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 351 #if defined(PETSC_HAVE_DEVICE) 352 { 353 char backendstr[32] = {0}; 354 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 355 /* Change the defaults set in PetscSFCreate() with command line options */ 356 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 357 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 358 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 359 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 360 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 361 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 362 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 363 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 364 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 365 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 366 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 367 #elif defined(PETSC_HAVE_KOKKOS) 368 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 369 #endif 370 } 371 #endif 372 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 373 PetscOptionsEnd(); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 /*@ 378 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 379 380 Logically Collective 381 382 Input Parameters: 383 + sf - star forest 384 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 385 386 Level: advanced 387 388 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 389 @*/ 390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 391 { 392 PetscFunctionBegin; 393 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 394 PetscValidLogicalCollectiveBool(sf, flg, 2); 395 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 396 sf->rankorder = flg; 397 PetscFunctionReturn(PETSC_SUCCESS); 398 } 399 400 /*@C 401 PetscSFSetGraph - Set a parallel star forest 402 403 Collective 404 405 Input Parameters: 406 + sf - star forest 407 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 408 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 409 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced 410 during setup in debug mode) 411 . localmode - copy mode for `ilocal` 412 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 413 during setup in debug mode) 414 - remotemode - copy mode for `iremote` 415 416 Level: intermediate 417 418 Notes: 419 Leaf indices in `ilocal` must be unique, otherwise an error occurs. 420 421 Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics. 422 In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 423 PETSc might modify the respective array; 424 if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`. 425 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 426 427 Fortran Note: 428 In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`. 429 430 Developer Note: 431 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 432 This also allows to compare leaf sets of two `PetscSF`s easily. 433 434 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 435 @*/ 436 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 437 { 438 PetscBool unique, contiguous; 439 440 PetscFunctionBegin; 441 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 442 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4); 443 if (nleaves > 0) PetscValidPointer(iremote, 6); 444 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 445 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 446 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 447 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 448 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 449 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 450 451 if (sf->nroots >= 0) { /* Reset only if graph already set */ 452 PetscCall(PetscSFReset(sf)); 453 } 454 455 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 456 457 sf->nroots = nroots; 458 sf->nleaves = nleaves; 459 460 if (localmode == PETSC_COPY_VALUES && ilocal) { 461 PetscInt *tlocal = NULL; 462 463 PetscCall(PetscMalloc1(nleaves, &tlocal)); 464 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 465 ilocal = tlocal; 466 } 467 if (remotemode == PETSC_COPY_VALUES) { 468 PetscSFNode *tremote = NULL; 469 470 PetscCall(PetscMalloc1(nleaves, &tremote)); 471 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 472 iremote = tremote; 473 } 474 475 if (nleaves && ilocal) { 476 PetscSFNode work; 477 478 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 479 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 480 unique = PetscNot(unique); 481 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 482 sf->minleaf = ilocal[0]; 483 sf->maxleaf = ilocal[nleaves - 1]; 484 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 485 } else { 486 sf->minleaf = 0; 487 sf->maxleaf = nleaves - 1; 488 unique = PETSC_TRUE; 489 contiguous = PETSC_TRUE; 490 } 491 492 if (contiguous) { 493 if (localmode == PETSC_USE_POINTER) { 494 ilocal = NULL; 495 } else { 496 PetscCall(PetscFree(ilocal)); 497 } 498 } 499 sf->mine = ilocal; 500 if (localmode == PETSC_USE_POINTER) { 501 sf->mine_alloc = NULL; 502 } else { 503 sf->mine_alloc = ilocal; 504 } 505 sf->remote = iremote; 506 if (remotemode == PETSC_USE_POINTER) { 507 sf->remote_alloc = NULL; 508 } else { 509 sf->remote_alloc = iremote; 510 } 511 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 512 sf->graphset = PETSC_TRUE; 513 PetscFunctionReturn(PETSC_SUCCESS); 514 } 515 516 /*@ 517 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 518 519 Collective 520 521 Input Parameters: 522 + sf - The `PetscSF` 523 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 524 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 525 526 Level: intermediate 527 528 Notes: 529 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`. 530 `n` and `N` are the local and global sizes of `x` respectively. 531 532 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to 533 sequential vectors `y` on all MPI processes. 534 535 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a 536 sequential vector `y` on rank 0. 537 538 In above cases, entries of `x` are roots and entries of `y` are leaves. 539 540 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine 541 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 542 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 543 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 544 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 545 546 In this case, roots and leaves are symmetric. 547 548 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 549 @*/ 550 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 551 { 552 MPI_Comm comm; 553 PetscInt n, N, res[2]; 554 PetscMPIInt rank, size; 555 PetscSFType type; 556 557 PetscFunctionBegin; 558 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 559 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2); 560 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 561 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 562 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 563 PetscCallMPI(MPI_Comm_size(comm, &size)); 564 565 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 566 type = PETSCSFALLTOALL; 567 PetscCall(PetscLayoutCreate(comm, &sf->map)); 568 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 569 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 570 PetscCall(PetscLayoutSetUp(sf->map)); 571 } else { 572 PetscCall(PetscLayoutGetLocalSize(map, &n)); 573 PetscCall(PetscLayoutGetSize(map, &N)); 574 res[0] = n; 575 res[1] = -n; 576 /* Check if n are same over all ranks so that we can optimize it */ 577 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 578 if (res[0] == -res[1]) { /* same n */ 579 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 580 } else { 581 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 582 } 583 PetscCall(PetscLayoutReference(map, &sf->map)); 584 } 585 PetscCall(PetscSFSetType(sf, type)); 586 587 sf->pattern = pattern; 588 sf->mine = NULL; /* Contiguous */ 589 590 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 591 Also set other easy stuff. 592 */ 593 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 594 sf->nleaves = N; 595 sf->nroots = n; 596 sf->nranks = size; 597 sf->minleaf = 0; 598 sf->maxleaf = N - 1; 599 } else if (pattern == PETSCSF_PATTERN_GATHER) { 600 sf->nleaves = rank ? 0 : N; 601 sf->nroots = n; 602 sf->nranks = rank ? 0 : size; 603 sf->minleaf = 0; 604 sf->maxleaf = rank ? -1 : N - 1; 605 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 606 sf->nleaves = size; 607 sf->nroots = size; 608 sf->nranks = size; 609 sf->minleaf = 0; 610 sf->maxleaf = size - 1; 611 } 612 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 613 sf->graphset = PETSC_TRUE; 614 PetscFunctionReturn(PETSC_SUCCESS); 615 } 616 617 /*@ 618 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 619 620 Collective 621 622 Input Parameter: 623 . sf - star forest to invert 624 625 Output Parameter: 626 . isf - inverse of `sf` 627 628 Level: advanced 629 630 Notes: 631 All roots must have degree 1. 632 633 The local space may be a permutation, but cannot be sparse. 634 635 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()` 636 @*/ 637 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 638 { 639 PetscMPIInt rank; 640 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 641 const PetscInt *ilocal; 642 PetscSFNode *roots, *leaves; 643 644 PetscFunctionBegin; 645 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 646 PetscSFCheckGraphSet(sf, 1); 647 PetscValidPointer(isf, 2); 648 649 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 650 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 651 652 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 653 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 654 for (i = 0; i < maxlocal; i++) { 655 leaves[i].rank = rank; 656 leaves[i].index = i; 657 } 658 for (i = 0; i < nroots; i++) { 659 roots[i].rank = -1; 660 roots[i].index = -1; 661 } 662 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 663 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 664 665 /* Check whether our leaves are sparse */ 666 for (i = 0, count = 0; i < nroots; i++) 667 if (roots[i].rank >= 0) count++; 668 if (count == nroots) newilocal = NULL; 669 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 670 for (i = 0, count = 0; i < nroots; i++) { 671 if (roots[i].rank >= 0) { 672 newilocal[count] = i; 673 roots[count].rank = roots[i].rank; 674 roots[count].index = roots[i].index; 675 count++; 676 } 677 } 678 } 679 680 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 681 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 682 PetscCall(PetscFree2(roots, leaves)); 683 PetscFunctionReturn(PETSC_SUCCESS); 684 } 685 686 /*@ 687 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 688 689 Collective 690 691 Input Parameters: 692 + sf - communication object to duplicate 693 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 694 695 Output Parameter: 696 . newsf - new communication object 697 698 Level: beginner 699 700 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 701 @*/ 702 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 703 { 704 PetscSFType type; 705 MPI_Datatype dtype = MPIU_SCALAR; 706 707 PetscFunctionBegin; 708 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 709 PetscValidLogicalCollectiveEnum(sf, opt, 2); 710 PetscValidPointer(newsf, 3); 711 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 712 PetscCall(PetscSFGetType(sf, &type)); 713 if (type) PetscCall(PetscSFSetType(*newsf, type)); 714 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */ 715 if (opt == PETSCSF_DUPLICATE_GRAPH) { 716 PetscSFCheckGraphSet(sf, 1); 717 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 718 PetscInt nroots, nleaves; 719 const PetscInt *ilocal; 720 const PetscSFNode *iremote; 721 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 722 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 723 } else { 724 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 725 } 726 } 727 /* Since oldtype is committed, so is newtype, according to MPI */ 728 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 729 (*newsf)->vscat.bs = sf->vscat.bs; 730 (*newsf)->vscat.unit = dtype; 731 (*newsf)->vscat.to_n = sf->vscat.to_n; 732 (*newsf)->vscat.from_n = sf->vscat.from_n; 733 /* Do not copy lsf. Build it on demand since it is rarely used */ 734 735 #if defined(PETSC_HAVE_DEVICE) 736 (*newsf)->backend = sf->backend; 737 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 738 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 739 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 740 #endif 741 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 742 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 743 PetscFunctionReturn(PETSC_SUCCESS); 744 } 745 746 /*@C 747 PetscSFGetGraph - Get the graph specifying a parallel star forest 748 749 Not Collective 750 751 Input Parameter: 752 . sf - star forest 753 754 Output Parameters: 755 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 756 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 757 . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage) 758 - iremote - remote locations of root vertices for each leaf on the current process 759 760 Level: intermediate 761 762 Notes: 763 We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet 764 765 The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()` 766 767 Fortran Notes: 768 The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you 769 want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array. 770 771 To check for a `NULL` `ilocal` use 772 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 773 774 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 775 @*/ 776 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 777 { 778 PetscFunctionBegin; 779 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 780 if (sf->ops->GetGraph) { 781 PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote)); 782 } else { 783 if (nroots) *nroots = sf->nroots; 784 if (nleaves) *nleaves = sf->nleaves; 785 if (ilocal) *ilocal = sf->mine; 786 if (iremote) *iremote = sf->remote; 787 } 788 PetscFunctionReturn(PETSC_SUCCESS); 789 } 790 791 /*@ 792 PetscSFGetLeafRange - Get the active leaf ranges 793 794 Not Collective 795 796 Input Parameter: 797 . sf - star forest 798 799 Output Parameters: 800 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves. 801 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves. 802 803 Level: developer 804 805 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 806 @*/ 807 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 808 { 809 PetscFunctionBegin; 810 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 811 PetscSFCheckGraphSet(sf, 1); 812 if (minleaf) *minleaf = sf->minleaf; 813 if (maxleaf) *maxleaf = sf->maxleaf; 814 PetscFunctionReturn(PETSC_SUCCESS); 815 } 816 817 /*@C 818 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 819 820 Collective on A 821 822 Input Parameters: 823 + A - the star forest 824 . obj - Optional object that provides the prefix for the option names 825 - name - command line option 826 827 Level: intermediate 828 829 Note: 830 See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat` 831 832 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 833 @*/ 834 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 835 { 836 PetscFunctionBegin; 837 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 838 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 839 PetscFunctionReturn(PETSC_SUCCESS); 840 } 841 842 /*@C 843 PetscSFView - view a star forest 844 845 Collective 846 847 Input Parameters: 848 + sf - star forest 849 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 850 851 Level: beginner 852 853 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 854 @*/ 855 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 856 { 857 PetscBool iascii; 858 PetscViewerFormat format; 859 860 PetscFunctionBegin; 861 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 862 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 863 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 864 PetscCheckSameComm(sf, 1, viewer, 2); 865 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 866 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 867 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 868 PetscMPIInt rank; 869 PetscInt ii, i, j; 870 871 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 872 PetscCall(PetscViewerASCIIPushTab(viewer)); 873 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 874 if (!sf->graphset) { 875 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 876 PetscCall(PetscViewerASCIIPopTab(viewer)); 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 880 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 881 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 882 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 883 PetscCall(PetscViewerFlush(viewer)); 884 PetscCall(PetscViewerGetFormat(viewer, &format)); 885 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 886 PetscMPIInt *tmpranks, *perm; 887 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 888 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 889 for (i = 0; i < sf->nranks; i++) perm[i] = i; 890 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 891 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 892 for (ii = 0; ii < sf->nranks; ii++) { 893 i = perm[ii]; 894 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 895 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 896 } 897 PetscCall(PetscFree2(tmpranks, perm)); 898 } 899 PetscCall(PetscViewerFlush(viewer)); 900 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 901 } 902 PetscCall(PetscViewerASCIIPopTab(viewer)); 903 } 904 PetscTryTypeMethod(sf, View, viewer); 905 PetscFunctionReturn(PETSC_SUCCESS); 906 } 907 908 /*@C 909 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 910 911 Not Collective 912 913 Input Parameter: 914 . sf - star forest 915 916 Output Parameters: 917 + nranks - number of ranks referenced by local part 918 . ranks - [`nranks`] array of ranks 919 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank 920 . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank 921 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank 922 923 Level: developer 924 925 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 926 @*/ 927 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 928 { 929 PetscFunctionBegin; 930 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 931 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 932 if (sf->ops->GetRootRanks) { 933 PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote)); 934 } else { 935 /* The generic implementation */ 936 if (nranks) *nranks = sf->nranks; 937 if (ranks) *ranks = sf->ranks; 938 if (roffset) *roffset = sf->roffset; 939 if (rmine) *rmine = sf->rmine; 940 if (rremote) *rremote = sf->rremote; 941 } 942 PetscFunctionReturn(PETSC_SUCCESS); 943 } 944 945 /*@C 946 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 947 948 Not Collective 949 950 Input Parameter: 951 . sf - star forest 952 953 Output Parameters: 954 + niranks - number of leaf ranks referencing roots on this process 955 . iranks - [`niranks`] array of ranks 956 . ioffset - [`niranks`+1] offset in `irootloc` for each rank 957 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank 958 959 Level: developer 960 961 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 962 @*/ 963 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 964 { 965 PetscFunctionBegin; 966 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 967 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 968 if (sf->ops->GetLeafRanks) { 969 PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc)); 970 } else { 971 PetscSFType type; 972 PetscCall(PetscSFGetType(sf, &type)); 973 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 974 } 975 PetscFunctionReturn(PETSC_SUCCESS); 976 } 977 978 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 979 { 980 PetscInt i; 981 for (i = 0; i < n; i++) { 982 if (needle == list[i]) return PETSC_TRUE; 983 } 984 return PETSC_FALSE; 985 } 986 987 /*@C 988 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 989 990 Collective 991 992 Input Parameters: 993 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 994 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 995 996 Level: developer 997 998 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 999 @*/ 1000 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 1001 { 1002 PetscHMapI table; 1003 PetscHashIter pos; 1004 PetscMPIInt size, groupsize, *groupranks; 1005 PetscInt *rcount, *ranks; 1006 PetscInt i, irank = -1, orank = -1; 1007 1008 PetscFunctionBegin; 1009 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1010 PetscSFCheckGraphSet(sf, 1); 1011 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1012 PetscCall(PetscHMapICreateWithSize(10, &table)); 1013 for (i = 0; i < sf->nleaves; i++) { 1014 /* Log 1-based rank */ 1015 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1016 } 1017 PetscCall(PetscHMapIGetSize(table, &sf->nranks)); 1018 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1019 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1020 PetscHashIterBegin(table, pos); 1021 for (i = 0; i < sf->nranks; i++) { 1022 PetscHashIterGetKey(table, pos, ranks[i]); 1023 PetscHashIterGetVal(table, pos, rcount[i]); 1024 PetscHashIterNext(table, pos); 1025 ranks[i]--; /* Convert back to 0-based */ 1026 } 1027 PetscCall(PetscHMapIDestroy(&table)); 1028 1029 /* We expect that dgroup is reliably "small" while nranks could be large */ 1030 { 1031 MPI_Group group = MPI_GROUP_NULL; 1032 PetscMPIInt *dgroupranks; 1033 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1034 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1035 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1036 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1037 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1038 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1039 PetscCallMPI(MPI_Group_free(&group)); 1040 PetscCall(PetscFree(dgroupranks)); 1041 } 1042 1043 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1044 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1045 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1046 if (InList(ranks[i], groupsize, groupranks)) break; 1047 } 1048 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1049 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1050 } 1051 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1052 PetscInt tmprank, tmpcount; 1053 1054 tmprank = ranks[i]; 1055 tmpcount = rcount[i]; 1056 ranks[i] = ranks[sf->ndranks]; 1057 rcount[i] = rcount[sf->ndranks]; 1058 ranks[sf->ndranks] = tmprank; 1059 rcount[sf->ndranks] = tmpcount; 1060 sf->ndranks++; 1061 } 1062 } 1063 PetscCall(PetscFree(groupranks)); 1064 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1065 PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1066 sf->roffset[0] = 0; 1067 for (i = 0; i < sf->nranks; i++) { 1068 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1069 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1070 rcount[i] = 0; 1071 } 1072 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1073 /* short circuit */ 1074 if (orank != sf->remote[i].rank) { 1075 /* Search for index of iremote[i].rank in sf->ranks */ 1076 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1077 if (irank < 0) { 1078 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1079 if (irank >= 0) irank += sf->ndranks; 1080 } 1081 orank = sf->remote[i].rank; 1082 } 1083 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1084 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1085 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1086 rcount[irank]++; 1087 } 1088 PetscCall(PetscFree2(rcount, ranks)); 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 /*@C 1093 PetscSFGetGroups - gets incoming and outgoing process groups 1094 1095 Collective 1096 1097 Input Parameter: 1098 . sf - star forest 1099 1100 Output Parameters: 1101 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1102 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1103 1104 Level: developer 1105 1106 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1107 @*/ 1108 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1109 { 1110 MPI_Group group = MPI_GROUP_NULL; 1111 1112 PetscFunctionBegin; 1113 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1114 if (sf->ingroup == MPI_GROUP_NULL) { 1115 PetscInt i; 1116 const PetscInt *indegree; 1117 PetscMPIInt rank, *outranks, *inranks; 1118 PetscSFNode *remote; 1119 PetscSF bgcount; 1120 1121 /* Compute the number of incoming ranks */ 1122 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1123 for (i = 0; i < sf->nranks; i++) { 1124 remote[i].rank = sf->ranks[i]; 1125 remote[i].index = 0; 1126 } 1127 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1128 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1129 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1130 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1131 /* Enumerate the incoming ranks */ 1132 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1133 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1134 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1135 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1136 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1137 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1138 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1139 PetscCallMPI(MPI_Group_free(&group)); 1140 PetscCall(PetscFree2(inranks, outranks)); 1141 PetscCall(PetscSFDestroy(&bgcount)); 1142 } 1143 *incoming = sf->ingroup; 1144 1145 if (sf->outgroup == MPI_GROUP_NULL) { 1146 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1147 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1148 PetscCallMPI(MPI_Group_free(&group)); 1149 } 1150 *outgoing = sf->outgroup; 1151 PetscFunctionReturn(PETSC_SUCCESS); 1152 } 1153 1154 /*@ 1155 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1156 1157 Collective 1158 1159 Input Parameter: 1160 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1161 1162 Output Parameter: 1163 . multi - star forest with split roots, such that each root has degree exactly 1 1164 1165 Level: developer 1166 1167 Note: 1168 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1169 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1170 edge, it is a candidate for future optimization that might involve its removal. 1171 1172 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1173 @*/ 1174 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1175 { 1176 PetscFunctionBegin; 1177 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1178 PetscValidPointer(multi, 2); 1179 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1180 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1181 *multi = sf->multi; 1182 sf->multi->multi = sf->multi; 1183 PetscFunctionReturn(PETSC_SUCCESS); 1184 } 1185 if (!sf->multi) { 1186 const PetscInt *indegree; 1187 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1188 PetscSFNode *remote; 1189 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1190 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1191 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1192 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1193 inoffset[0] = 0; 1194 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1195 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1196 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1197 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1198 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1199 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1200 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1201 } 1202 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1203 for (i = 0; i < sf->nleaves; i++) { 1204 remote[i].rank = sf->remote[i].rank; 1205 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1206 } 1207 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1208 sf->multi->multi = sf->multi; 1209 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1210 if (sf->rankorder) { /* Sort the ranks */ 1211 PetscMPIInt rank; 1212 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1213 PetscSFNode *newremote; 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1215 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1216 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1217 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1218 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1219 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1220 /* Sort the incoming ranks at each vertex, build the inverse map */ 1221 for (i = 0; i < sf->nroots; i++) { 1222 PetscInt j; 1223 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1224 PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset)); 1225 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1226 } 1227 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1228 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1229 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1230 for (i = 0; i < sf->nleaves; i++) { 1231 newremote[i].rank = sf->remote[i].rank; 1232 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1233 } 1234 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1235 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1236 } 1237 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1238 } 1239 *multi = sf->multi; 1240 PetscFunctionReturn(PETSC_SUCCESS); 1241 } 1242 1243 /*@C 1244 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices 1245 1246 Collective 1247 1248 Input Parameters: 1249 + sf - original star forest 1250 . nselected - number of selected roots on this process 1251 - selected - indices of the selected roots on this process 1252 1253 Output Parameter: 1254 . esf - new star forest 1255 1256 Level: advanced 1257 1258 Note: 1259 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1260 be done by calling PetscSFGetGraph(). 1261 1262 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1263 @*/ 1264 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1265 { 1266 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1267 const PetscInt *ilocal; 1268 signed char *rootdata, *leafdata, *leafmem; 1269 const PetscSFNode *iremote; 1270 PetscSFNode *new_iremote; 1271 MPI_Comm comm; 1272 1273 PetscFunctionBegin; 1274 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1275 PetscSFCheckGraphSet(sf, 1); 1276 if (nselected) PetscValidIntPointer(selected, 3); 1277 PetscValidPointer(esf, 4); 1278 1279 PetscCall(PetscSFSetUp(sf)); 1280 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1281 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1282 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1283 1284 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1285 PetscBool dups; 1286 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1287 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1288 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1289 } 1290 1291 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1292 else { 1293 /* A generic version of creating embedded sf */ 1294 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1295 maxlocal = maxleaf - minleaf + 1; 1296 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1297 leafdata = leafmem - minleaf; 1298 /* Tag selected roots and bcast to leaves */ 1299 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1300 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1301 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1302 1303 /* Build esf with leaves that are still connected */ 1304 esf_nleaves = 0; 1305 for (i = 0; i < nleaves; i++) { 1306 j = ilocal ? ilocal[i] : i; 1307 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1308 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1309 */ 1310 esf_nleaves += (leafdata[j] ? 1 : 0); 1311 } 1312 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1313 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1314 for (i = n = 0; i < nleaves; i++) { 1315 j = ilocal ? ilocal[i] : i; 1316 if (leafdata[j]) { 1317 new_ilocal[n] = j; 1318 new_iremote[n].rank = iremote[i].rank; 1319 new_iremote[n].index = iremote[i].index; 1320 ++n; 1321 } 1322 } 1323 PetscCall(PetscSFCreate(comm, esf)); 1324 PetscCall(PetscSFSetFromOptions(*esf)); 1325 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1326 PetscCall(PetscFree2(rootdata, leafmem)); 1327 } 1328 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } 1331 1332 /*@C 1333 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices 1334 1335 Collective 1336 1337 Input Parameters: 1338 + sf - original star forest 1339 . nselected - number of selected leaves on this process 1340 - selected - indices of the selected leaves on this process 1341 1342 Output Parameter: 1343 . newsf - new star forest 1344 1345 Level: advanced 1346 1347 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1348 @*/ 1349 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1350 { 1351 const PetscSFNode *iremote; 1352 PetscSFNode *new_iremote; 1353 const PetscInt *ilocal; 1354 PetscInt i, nroots, *leaves, *new_ilocal; 1355 MPI_Comm comm; 1356 1357 PetscFunctionBegin; 1358 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1359 PetscSFCheckGraphSet(sf, 1); 1360 if (nselected) PetscValidIntPointer(selected, 3); 1361 PetscValidPointer(newsf, 4); 1362 1363 /* Uniq selected[] and put results in leaves[] */ 1364 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1365 PetscCall(PetscMalloc1(nselected, &leaves)); 1366 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1367 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1368 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1369 1370 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1371 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1372 else { 1373 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1374 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1375 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1376 for (i = 0; i < nselected; ++i) { 1377 const PetscInt l = leaves[i]; 1378 new_ilocal[i] = ilocal ? ilocal[l] : l; 1379 new_iremote[i].rank = iremote[l].rank; 1380 new_iremote[i].index = iremote[l].index; 1381 } 1382 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1383 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1384 } 1385 PetscCall(PetscFree(leaves)); 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 /*@C 1390 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1391 1392 Collective 1393 1394 Input Parameters: 1395 + sf - star forest on which to communicate 1396 . unit - data type associated with each node 1397 . rootdata - buffer to broadcast 1398 - op - operation to use for reduction 1399 1400 Output Parameter: 1401 . leafdata - buffer to be reduced with values from each leaf's respective root 1402 1403 Level: intermediate 1404 1405 Note: 1406 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1407 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1408 use `PetscSFBcastWithMemTypeBegin()` instead. 1409 1410 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1411 @*/ 1412 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1413 { 1414 PetscMemType rootmtype, leafmtype; 1415 1416 PetscFunctionBegin; 1417 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1418 PetscCall(PetscSFSetUp(sf)); 1419 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1420 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1421 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1422 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1423 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1424 PetscFunctionReturn(PETSC_SUCCESS); 1425 } 1426 1427 /*@C 1428 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call 1429 to `PetscSFBcastEnd()` 1430 1431 Collective 1432 1433 Input Parameters: 1434 + sf - star forest on which to communicate 1435 . unit - data type associated with each node 1436 . rootmtype - memory type of rootdata 1437 . rootdata - buffer to broadcast 1438 . leafmtype - memory type of leafdata 1439 - op - operation to use for reduction 1440 1441 Output Parameter: 1442 . leafdata - buffer to be reduced with values from each leaf's respective root 1443 1444 Level: intermediate 1445 1446 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1447 @*/ 1448 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1449 { 1450 PetscFunctionBegin; 1451 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1452 PetscCall(PetscSFSetUp(sf)); 1453 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1454 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1455 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1456 PetscFunctionReturn(PETSC_SUCCESS); 1457 } 1458 1459 /*@C 1460 PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()` 1461 1462 Collective 1463 1464 Input Parameters: 1465 + sf - star forest 1466 . unit - data type 1467 . rootdata - buffer to broadcast 1468 - op - operation to use for reduction 1469 1470 Output Parameter: 1471 . leafdata - buffer to be reduced with values from each leaf's respective root 1472 1473 Level: intermediate 1474 1475 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1476 @*/ 1477 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1478 { 1479 PetscFunctionBegin; 1480 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1481 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1482 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1483 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1484 PetscFunctionReturn(PETSC_SUCCESS); 1485 } 1486 1487 /*@C 1488 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1489 1490 Collective 1491 1492 Input Parameters: 1493 + sf - star forest 1494 . unit - data type 1495 . leafdata - values to reduce 1496 - op - reduction operation 1497 1498 Output Parameter: 1499 . rootdata - result of reduction of values from all leaves of each root 1500 1501 Level: intermediate 1502 1503 Note: 1504 When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1505 are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should 1506 use `PetscSFReduceWithMemTypeBegin()` instead. 1507 1508 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()` 1509 @*/ 1510 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1511 { 1512 PetscMemType rootmtype, leafmtype; 1513 1514 PetscFunctionBegin; 1515 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1516 PetscCall(PetscSFSetUp(sf)); 1517 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1518 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1519 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1520 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1521 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1522 PetscFunctionReturn(PETSC_SUCCESS); 1523 } 1524 1525 /*@C 1526 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1527 1528 Collective 1529 1530 Input Parameters: 1531 + sf - star forest 1532 . unit - data type 1533 . leafmtype - memory type of leafdata 1534 . leafdata - values to reduce 1535 . rootmtype - memory type of rootdata 1536 - op - reduction operation 1537 1538 Output Parameter: 1539 . rootdata - result of reduction of values from all leaves of each root 1540 1541 Level: intermediate 1542 1543 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()` 1544 @*/ 1545 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1546 { 1547 PetscFunctionBegin; 1548 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1549 PetscCall(PetscSFSetUp(sf)); 1550 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1551 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1552 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1553 PetscFunctionReturn(PETSC_SUCCESS); 1554 } 1555 1556 /*@C 1557 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()` 1558 1559 Collective 1560 1561 Input Parameters: 1562 + sf - star forest 1563 . unit - data type 1564 . leafdata - values to reduce 1565 - op - reduction operation 1566 1567 Output Parameter: 1568 . rootdata - result of reduction of values from all leaves of each root 1569 1570 Level: intermediate 1571 1572 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()` 1573 @*/ 1574 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1575 { 1576 PetscFunctionBegin; 1577 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1578 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1579 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1580 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1581 PetscFunctionReturn(PETSC_SUCCESS); 1582 } 1583 1584 /*@C 1585 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1586 to be completed with `PetscSFFetchAndOpEnd()` 1587 1588 Collective 1589 1590 Input Parameters: 1591 + sf - star forest 1592 . unit - data type 1593 . leafdata - leaf values to use in reduction 1594 - op - operation to use for reduction 1595 1596 Output Parameters: 1597 + rootdata - root values to be updated, input state is seen by first process to perform an update 1598 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1599 1600 Level: advanced 1601 1602 Note: 1603 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1604 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1605 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1606 integers. 1607 1608 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1609 @*/ 1610 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1611 { 1612 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1613 1614 PetscFunctionBegin; 1615 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1616 PetscCall(PetscSFSetUp(sf)); 1617 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1618 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1619 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1620 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1621 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1622 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1623 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1624 PetscFunctionReturn(PETSC_SUCCESS); 1625 } 1626 1627 /*@C 1628 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1629 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1630 1631 Collective 1632 1633 Input Parameters: 1634 + sf - star forest 1635 . unit - data type 1636 . rootmtype - memory type of rootdata 1637 . leafmtype - memory type of leafdata 1638 . leafdata - leaf values to use in reduction 1639 . leafupdatemtype - memory type of leafupdate 1640 - op - operation to use for reduction 1641 1642 Output Parameters: 1643 + rootdata - root values to be updated, input state is seen by first process to perform an update 1644 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1645 1646 Level: advanced 1647 1648 Note: 1649 See `PetscSFFetchAndOpBegin()` for more details. 1650 1651 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()` 1652 @*/ 1653 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1654 { 1655 PetscFunctionBegin; 1656 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1657 PetscCall(PetscSFSetUp(sf)); 1658 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1659 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1660 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1661 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 /*@C 1666 PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()` 1667 to fetch values from roots and update atomically by applying operation using my leaf value 1668 1669 Collective 1670 1671 Input Parameters: 1672 + sf - star forest 1673 . unit - data type 1674 . leafdata - leaf values to use in reduction 1675 - op - operation to use for reduction 1676 1677 Output Parameters: 1678 + rootdata - root values to be updated, input state is seen by first process to perform an update 1679 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1680 1681 Level: advanced 1682 1683 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()` 1684 @*/ 1685 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1686 { 1687 PetscFunctionBegin; 1688 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1689 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1690 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1691 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1692 PetscFunctionReturn(PETSC_SUCCESS); 1693 } 1694 1695 /*@C 1696 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1697 1698 Collective 1699 1700 Input Parameter: 1701 . sf - star forest 1702 1703 Output Parameter: 1704 . degree - degree of each root vertex 1705 1706 Level: advanced 1707 1708 Note: 1709 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1710 1711 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1712 @*/ 1713 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) 1714 { 1715 PetscFunctionBegin; 1716 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1717 PetscSFCheckGraphSet(sf, 1); 1718 PetscValidPointer(degree, 2); 1719 if (!sf->degreeknown) { 1720 PetscInt i, nroots = sf->nroots, maxlocal; 1721 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1722 maxlocal = sf->maxleaf - sf->minleaf + 1; 1723 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1724 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1725 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1726 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1727 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1728 } 1729 *degree = NULL; 1730 PetscFunctionReturn(PETSC_SUCCESS); 1731 } 1732 1733 /*@C 1734 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1735 1736 Collective 1737 1738 Input Parameter: 1739 . sf - star forest 1740 1741 Output Parameter: 1742 . degree - degree of each root vertex 1743 1744 Level: developer 1745 1746 Note: 1747 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it. 1748 1749 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1750 @*/ 1751 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1752 { 1753 PetscFunctionBegin; 1754 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1755 PetscSFCheckGraphSet(sf, 1); 1756 PetscValidPointer(degree, 2); 1757 if (!sf->degreeknown) { 1758 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1759 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1760 PetscCall(PetscFree(sf->degreetmp)); 1761 sf->degreeknown = PETSC_TRUE; 1762 } 1763 *degree = sf->degree; 1764 PetscFunctionReturn(PETSC_SUCCESS); 1765 } 1766 1767 /*@C 1768 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`). 1769 Each multi-root is assigned index of the corresponding original root. 1770 1771 Collective 1772 1773 Input Parameters: 1774 + sf - star forest 1775 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1776 1777 Output Parameters: 1778 + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`) 1779 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots` 1780 1781 Level: developer 1782 1783 Note: 1784 The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1785 1786 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1787 @*/ 1788 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1789 { 1790 PetscSF msf; 1791 PetscInt i, j, k, nroots, nmroots; 1792 1793 PetscFunctionBegin; 1794 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1795 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1796 if (nroots) PetscValidIntPointer(degree, 2); 1797 if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3); 1798 PetscValidPointer(multiRootsOrigNumbering, 4); 1799 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1800 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1801 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1802 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1803 if (!degree[i]) continue; 1804 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1805 } 1806 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1807 if (nMultiRoots) *nMultiRoots = nmroots; 1808 PetscFunctionReturn(PETSC_SUCCESS); 1809 } 1810 1811 /*@C 1812 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1813 1814 Collective 1815 1816 Input Parameters: 1817 + sf - star forest 1818 . unit - data type 1819 - leafdata - leaf data to gather to roots 1820 1821 Output Parameter: 1822 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1823 1824 Level: intermediate 1825 1826 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1827 @*/ 1828 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1829 { 1830 PetscSF multi = NULL; 1831 1832 PetscFunctionBegin; 1833 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1834 PetscCall(PetscSFSetUp(sf)); 1835 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1836 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1837 PetscFunctionReturn(PETSC_SUCCESS); 1838 } 1839 1840 /*@C 1841 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1842 1843 Collective 1844 1845 Input Parameters: 1846 + sf - star forest 1847 . unit - data type 1848 - leafdata - leaf data to gather to roots 1849 1850 Output Parameter: 1851 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1852 1853 Level: intermediate 1854 1855 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1856 @*/ 1857 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1858 { 1859 PetscSF multi = NULL; 1860 1861 PetscFunctionBegin; 1862 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1863 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1864 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1865 PetscFunctionReturn(PETSC_SUCCESS); 1866 } 1867 1868 /*@C 1869 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1870 1871 Collective 1872 1873 Input Parameters: 1874 + sf - star forest 1875 . unit - data type 1876 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1877 1878 Output Parameter: 1879 . leafdata - leaf data to be update with personal data from each respective root 1880 1881 Level: intermediate 1882 1883 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()` 1884 @*/ 1885 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1886 { 1887 PetscSF multi = NULL; 1888 1889 PetscFunctionBegin; 1890 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1891 PetscCall(PetscSFSetUp(sf)); 1892 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1893 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1894 PetscFunctionReturn(PETSC_SUCCESS); 1895 } 1896 1897 /*@C 1898 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1899 1900 Collective 1901 1902 Input Parameters: 1903 + sf - star forest 1904 . unit - data type 1905 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1906 1907 Output Parameter: 1908 . leafdata - leaf data to be update with personal data from each respective root 1909 1910 Level: intermediate 1911 1912 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()` 1913 @*/ 1914 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1915 { 1916 PetscSF multi = NULL; 1917 1918 PetscFunctionBegin; 1919 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1920 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1921 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1922 PetscFunctionReturn(PETSC_SUCCESS); 1923 } 1924 1925 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1926 { 1927 PetscInt i, n, nleaves; 1928 const PetscInt *ilocal = NULL; 1929 PetscHSetI seen; 1930 1931 PetscFunctionBegin; 1932 if (PetscDefined(USE_DEBUG)) { 1933 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1934 PetscCall(PetscHSetICreate(&seen)); 1935 for (i = 0; i < nleaves; i++) { 1936 const PetscInt leaf = ilocal ? ilocal[i] : i; 1937 PetscCall(PetscHSetIAdd(seen, leaf)); 1938 } 1939 PetscCall(PetscHSetIGetSize(seen, &n)); 1940 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 1941 PetscCall(PetscHSetIDestroy(&seen)); 1942 } 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 /*@ 1947 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 1948 1949 Input Parameters: 1950 + sfA - The first `PetscSF` 1951 - sfB - The second `PetscSF` 1952 1953 Output Parameters: 1954 . sfBA - The composite `PetscSF` 1955 1956 Level: developer 1957 1958 Notes: 1959 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 1960 forests, i.e. the same leaf is not connected with different roots. 1961 1962 `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds 1963 a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected 1964 nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a 1965 `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes. 1966 1967 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1968 @*/ 1969 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 1970 { 1971 const PetscSFNode *remotePointsA, *remotePointsB; 1972 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 1973 const PetscInt *localPointsA, *localPointsB; 1974 PetscInt *localPointsBA; 1975 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 1976 PetscBool denseB; 1977 1978 PetscFunctionBegin; 1979 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 1980 PetscSFCheckGraphSet(sfA, 1); 1981 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 1982 PetscSFCheckGraphSet(sfB, 2); 1983 PetscCheckSameComm(sfA, 1, sfB, 2); 1984 PetscValidPointer(sfBA, 3); 1985 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1986 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1987 1988 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 1989 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 1990 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size 1991 numRootsB; otherwise, garbage will be broadcasted. 1992 Example (comm size = 1): 1993 sfA: 0 <- (0, 0) 1994 sfB: 100 <- (0, 0) 1995 101 <- (0, 1) 1996 Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget 1997 of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would 1998 receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on 1999 remotePointsA; if not recasted, point 101 would receive a garbage value. */ 2000 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 2001 for (i = 0; i < numRootsB; i++) { 2002 reorderedRemotePointsA[i].rank = -1; 2003 reorderedRemotePointsA[i].index = -1; 2004 } 2005 for (i = 0; i < numLeavesA; i++) { 2006 PetscInt localp = localPointsA ? localPointsA[i] : i; 2007 2008 if (localp >= numRootsB) continue; 2009 reorderedRemotePointsA[localp] = remotePointsA[i]; 2010 } 2011 remotePointsA = reorderedRemotePointsA; 2012 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2013 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2014 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2015 leafdataB[i].rank = -1; 2016 leafdataB[i].index = -1; 2017 } 2018 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2019 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2020 PetscCall(PetscFree(reorderedRemotePointsA)); 2021 2022 denseB = (PetscBool)!localPointsB; 2023 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2024 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2025 else numLeavesBA++; 2026 } 2027 if (denseB) { 2028 localPointsBA = NULL; 2029 remotePointsBA = leafdataB; 2030 } else { 2031 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2032 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2033 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2034 const PetscInt l = localPointsB ? localPointsB[i] : i; 2035 2036 if (leafdataB[l - minleaf].rank == -1) continue; 2037 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2038 localPointsBA[numLeavesBA] = l; 2039 numLeavesBA++; 2040 } 2041 PetscCall(PetscFree(leafdataB)); 2042 } 2043 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2044 PetscCall(PetscSFSetFromOptions(*sfBA)); 2045 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2046 PetscFunctionReturn(PETSC_SUCCESS); 2047 } 2048 2049 /*@ 2050 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2051 2052 Input Parameters: 2053 + sfA - The first `PetscSF` 2054 - sfB - The second `PetscSF` 2055 2056 Output Parameters: 2057 . sfBA - The composite `PetscSF`. 2058 2059 Level: developer 2060 2061 Notes: 2062 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 2063 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2064 second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected. 2065 2066 `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds 2067 a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected 2068 roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` 2069 on `sfA`, then 2070 a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots. 2071 2072 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2073 @*/ 2074 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2075 { 2076 const PetscSFNode *remotePointsA, *remotePointsB; 2077 PetscSFNode *remotePointsBA; 2078 const PetscInt *localPointsA, *localPointsB; 2079 PetscSFNode *reorderedRemotePointsA = NULL; 2080 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2081 MPI_Op op; 2082 #if defined(PETSC_USE_64BIT_INDICES) 2083 PetscBool iswin; 2084 #endif 2085 2086 PetscFunctionBegin; 2087 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2088 PetscSFCheckGraphSet(sfA, 1); 2089 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2090 PetscSFCheckGraphSet(sfB, 2); 2091 PetscCheckSameComm(sfA, 1, sfB, 2); 2092 PetscValidPointer(sfBA, 3); 2093 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2094 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2095 2096 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2097 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2098 2099 /* TODO: Check roots of sfB have degree of 1 */ 2100 /* Once we implement it, we can replace the MPI_MAXLOC 2101 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2102 We use MPI_MAXLOC only to have a deterministic output from this routine if 2103 the root condition is not meet. 2104 */ 2105 op = MPI_MAXLOC; 2106 #if defined(PETSC_USE_64BIT_INDICES) 2107 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2108 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2109 if (iswin) op = MPI_REPLACE; 2110 #endif 2111 2112 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2113 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2114 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2115 reorderedRemotePointsA[i].rank = -1; 2116 reorderedRemotePointsA[i].index = -1; 2117 } 2118 if (localPointsA) { 2119 for (i = 0; i < numLeavesA; i++) { 2120 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2121 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2122 } 2123 } else { 2124 for (i = 0; i < numLeavesA; i++) { 2125 if (i > maxleaf || i < minleaf) continue; 2126 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2127 } 2128 } 2129 2130 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2131 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2132 for (i = 0; i < numRootsB; i++) { 2133 remotePointsBA[i].rank = -1; 2134 remotePointsBA[i].index = -1; 2135 } 2136 2137 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2138 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2139 PetscCall(PetscFree(reorderedRemotePointsA)); 2140 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2141 if (remotePointsBA[i].rank == -1) continue; 2142 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2143 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2144 localPointsBA[numLeavesBA] = i; 2145 numLeavesBA++; 2146 } 2147 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2148 PetscCall(PetscSFSetFromOptions(*sfBA)); 2149 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2150 PetscFunctionReturn(PETSC_SUCCESS); 2151 } 2152 2153 /* 2154 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2155 2156 Input Parameters: 2157 . sf - The global `PetscSF` 2158 2159 Output Parameters: 2160 . out - The local `PetscSF` 2161 2162 .seealso: `PetscSF`, `PetscSFCreate()` 2163 */ 2164 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2165 { 2166 MPI_Comm comm; 2167 PetscMPIInt myrank; 2168 const PetscInt *ilocal; 2169 const PetscSFNode *iremote; 2170 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2171 PetscSFNode *liremote; 2172 PetscSF lsf; 2173 2174 PetscFunctionBegin; 2175 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2176 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2177 else { 2178 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2179 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2180 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2181 2182 /* Find out local edges and build a local SF */ 2183 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2184 for (i = lnleaves = 0; i < nleaves; i++) { 2185 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2186 } 2187 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2188 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2189 2190 for (i = j = 0; i < nleaves; i++) { 2191 if (iremote[i].rank == (PetscInt)myrank) { 2192 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2193 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2194 liremote[j].index = iremote[i].index; 2195 j++; 2196 } 2197 } 2198 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2199 PetscCall(PetscSFSetFromOptions(lsf)); 2200 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2201 PetscCall(PetscSFSetUp(lsf)); 2202 *out = lsf; 2203 } 2204 PetscFunctionReturn(PETSC_SUCCESS); 2205 } 2206 2207 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2208 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2209 { 2210 PetscMemType rootmtype, leafmtype; 2211 2212 PetscFunctionBegin; 2213 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2214 PetscCall(PetscSFSetUp(sf)); 2215 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2216 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2217 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2218 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2219 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2220 PetscFunctionReturn(PETSC_SUCCESS); 2221 } 2222 2223 /*@ 2224 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2225 2226 Input Parameters: 2227 + comm - the communicator 2228 . nsfs - the number of input `PetscSF` 2229 . sfs - the array of input `PetscSF` 2230 . rootMode - the root mode specifying how roots are handled 2231 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage 2232 2233 Output Parameters: 2234 . newsf - The resulting `PetscSF` 2235 2236 Level: advanced 2237 2238 Notes: 2239 The communicator of all `PetscSF`s in `sfs` must be comm. 2240 2241 Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order. 2242 2243 The offsets in `leafOffsets` are added to the original leaf indices. 2244 2245 If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well. 2246 In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`. 2247 2248 If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s. 2249 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2250 2251 All root modes retain the essential connectivity condition. 2252 If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`. 2253 Parameter `rootMode` controls how the input root spaces are combined. 2254 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode) 2255 and is also the same in the output `PetscSF`. 2256 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2257 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2258 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously. 2259 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2260 roots of sfs[0], sfs[1], sfs[2, ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously; 2261 the original root ranks are ignored. 2262 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2263 the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank 2264 to keep the load balancing. 2265 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks. 2266 2267 Example: 2268 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2269 .vb 2270 make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2271 for m in {local,global,shared}; do 2272 mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2273 done 2274 .ve 2275 we generate two identical `PetscSF`s sf_0 and sf_1, 2276 .vb 2277 PetscSF Object: sf_0 2 MPI processes 2278 type: basic 2279 rank #leaves #roots 2280 [ 0] 4 2 2281 [ 1] 4 2 2282 leaves roots roots in global numbering 2283 ( 0, 0) <- ( 0, 0) = 0 2284 ( 0, 1) <- ( 0, 1) = 1 2285 ( 0, 2) <- ( 1, 0) = 2 2286 ( 0, 3) <- ( 1, 1) = 3 2287 ( 1, 0) <- ( 0, 0) = 0 2288 ( 1, 1) <- ( 0, 1) = 1 2289 ( 1, 2) <- ( 1, 0) = 2 2290 ( 1, 3) <- ( 1, 1) = 3 2291 .ve 2292 and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf: 2293 .vb 2294 rootMode = local: 2295 PetscSF Object: result_sf 2 MPI processes 2296 type: basic 2297 rank #leaves #roots 2298 [ 0] 8 4 2299 [ 1] 8 4 2300 leaves roots roots in global numbering 2301 ( 0, 0) <- ( 0, 0) = 0 2302 ( 0, 1) <- ( 0, 1) = 1 2303 ( 0, 2) <- ( 1, 0) = 4 2304 ( 0, 3) <- ( 1, 1) = 5 2305 ( 0, 4) <- ( 0, 2) = 2 2306 ( 0, 5) <- ( 0, 3) = 3 2307 ( 0, 6) <- ( 1, 2) = 6 2308 ( 0, 7) <- ( 1, 3) = 7 2309 ( 1, 0) <- ( 0, 0) = 0 2310 ( 1, 1) <- ( 0, 1) = 1 2311 ( 1, 2) <- ( 1, 0) = 4 2312 ( 1, 3) <- ( 1, 1) = 5 2313 ( 1, 4) <- ( 0, 2) = 2 2314 ( 1, 5) <- ( 0, 3) = 3 2315 ( 1, 6) <- ( 1, 2) = 6 2316 ( 1, 7) <- ( 1, 3) = 7 2317 2318 rootMode = global: 2319 PetscSF Object: result_sf 2 MPI processes 2320 type: basic 2321 rank #leaves #roots 2322 [ 0] 8 4 2323 [ 1] 8 4 2324 leaves roots roots in global numbering 2325 ( 0, 0) <- ( 0, 0) = 0 2326 ( 0, 1) <- ( 0, 1) = 1 2327 ( 0, 2) <- ( 0, 2) = 2 2328 ( 0, 3) <- ( 0, 3) = 3 2329 ( 0, 4) <- ( 1, 0) = 4 2330 ( 0, 5) <- ( 1, 1) = 5 2331 ( 0, 6) <- ( 1, 2) = 6 2332 ( 0, 7) <- ( 1, 3) = 7 2333 ( 1, 0) <- ( 0, 0) = 0 2334 ( 1, 1) <- ( 0, 1) = 1 2335 ( 1, 2) <- ( 0, 2) = 2 2336 ( 1, 3) <- ( 0, 3) = 3 2337 ( 1, 4) <- ( 1, 0) = 4 2338 ( 1, 5) <- ( 1, 1) = 5 2339 ( 1, 6) <- ( 1, 2) = 6 2340 ( 1, 7) <- ( 1, 3) = 7 2341 2342 rootMode = shared: 2343 PetscSF Object: result_sf 2 MPI processes 2344 type: basic 2345 rank #leaves #roots 2346 [ 0] 8 2 2347 [ 1] 8 2 2348 leaves roots roots in global numbering 2349 ( 0, 0) <- ( 0, 0) = 0 2350 ( 0, 1) <- ( 0, 1) = 1 2351 ( 0, 2) <- ( 1, 0) = 2 2352 ( 0, 3) <- ( 1, 1) = 3 2353 ( 0, 4) <- ( 0, 0) = 0 2354 ( 0, 5) <- ( 0, 1) = 1 2355 ( 0, 6) <- ( 1, 0) = 2 2356 ( 0, 7) <- ( 1, 1) = 3 2357 ( 1, 0) <- ( 0, 0) = 0 2358 ( 1, 1) <- ( 0, 1) = 1 2359 ( 1, 2) <- ( 1, 0) = 2 2360 ( 1, 3) <- ( 1, 1) = 3 2361 ( 1, 4) <- ( 0, 0) = 0 2362 ( 1, 5) <- ( 0, 1) = 1 2363 ( 1, 6) <- ( 1, 0) = 2 2364 ( 1, 7) <- ( 1, 1) = 3 2365 .ve 2366 2367 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2368 @*/ 2369 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2370 { 2371 PetscInt i, s, nLeaves, nRoots; 2372 PetscInt *leafArrayOffsets; 2373 PetscInt *ilocal_new; 2374 PetscSFNode *iremote_new; 2375 PetscBool all_ilocal_null = PETSC_FALSE; 2376 PetscLayout glayout = NULL; 2377 PetscInt *gremote = NULL; 2378 PetscMPIInt rank, size; 2379 2380 PetscFunctionBegin; 2381 if (PetscDefined(USE_DEBUG)) { 2382 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2383 2384 PetscCall(PetscSFCreate(comm, &dummy)); 2385 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2386 PetscValidPointer(sfs, 3); 2387 for (i = 0; i < nsfs; i++) { 2388 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2389 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2390 } 2391 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2392 if (leafOffsets) PetscValidIntPointer(leafOffsets, 5); 2393 PetscValidPointer(newsf, 6); 2394 PetscCall(PetscSFDestroy(&dummy)); 2395 } 2396 if (!nsfs) { 2397 PetscCall(PetscSFCreate(comm, newsf)); 2398 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2399 PetscFunctionReturn(PETSC_SUCCESS); 2400 } 2401 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2402 PetscCallMPI(MPI_Comm_size(comm, &size)); 2403 2404 /* Calculate leaf array offsets */ 2405 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2406 leafArrayOffsets[0] = 0; 2407 for (s = 0; s < nsfs; s++) { 2408 PetscInt nl; 2409 2410 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2411 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2412 } 2413 nLeaves = leafArrayOffsets[nsfs]; 2414 2415 /* Calculate number of roots */ 2416 switch (rootMode) { 2417 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2418 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2419 if (PetscDefined(USE_DEBUG)) { 2420 for (s = 1; s < nsfs; s++) { 2421 PetscInt nr; 2422 2423 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2424 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2425 } 2426 } 2427 } break; 2428 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2429 /* Calculate also global layout in this case */ 2430 PetscInt *nls; 2431 PetscLayout *lts; 2432 PetscInt **inds; 2433 PetscInt j; 2434 PetscInt rootOffset = 0; 2435 2436 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2437 PetscCall(PetscLayoutCreate(comm, &glayout)); 2438 glayout->bs = 1; 2439 glayout->n = 0; 2440 glayout->N = 0; 2441 for (s = 0; s < nsfs; s++) { 2442 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2443 glayout->n += lts[s]->n; 2444 glayout->N += lts[s]->N; 2445 } 2446 PetscCall(PetscLayoutSetUp(glayout)); 2447 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2448 for (s = 0, j = 0; s < nsfs; s++) { 2449 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2450 rootOffset += lts[s]->N; 2451 PetscCall(PetscLayoutDestroy(<s[s])); 2452 PetscCall(PetscFree(inds[s])); 2453 } 2454 PetscCall(PetscFree3(lts, nls, inds)); 2455 nRoots = glayout->N; 2456 } break; 2457 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2458 /* nRoots calculated later in this case */ 2459 break; 2460 default: 2461 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2462 } 2463 2464 if (!leafOffsets) { 2465 all_ilocal_null = PETSC_TRUE; 2466 for (s = 0; s < nsfs; s++) { 2467 const PetscInt *ilocal; 2468 2469 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2470 if (ilocal) { 2471 all_ilocal_null = PETSC_FALSE; 2472 break; 2473 } 2474 } 2475 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2476 } 2477 2478 /* Renumber and concatenate local leaves */ 2479 ilocal_new = NULL; 2480 if (!all_ilocal_null) { 2481 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2482 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2483 for (s = 0; s < nsfs; s++) { 2484 const PetscInt *ilocal; 2485 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2486 PetscInt i, nleaves_l; 2487 2488 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2489 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2490 } 2491 } 2492 2493 /* Renumber and concatenate remote roots */ 2494 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2495 PetscInt rootOffset = 0; 2496 2497 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2498 for (i = 0; i < nLeaves; i++) { 2499 iremote_new[i].rank = -1; 2500 iremote_new[i].index = -1; 2501 } 2502 for (s = 0; s < nsfs; s++) { 2503 PetscInt i, nl, nr; 2504 PetscSF tmp_sf; 2505 const PetscSFNode *iremote; 2506 PetscSFNode *tmp_rootdata; 2507 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2508 2509 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2510 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2511 /* create helper SF with contiguous leaves */ 2512 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2513 PetscCall(PetscSFSetUp(tmp_sf)); 2514 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2515 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2516 for (i = 0; i < nr; i++) { 2517 tmp_rootdata[i].index = i + rootOffset; 2518 tmp_rootdata[i].rank = (PetscInt)rank; 2519 } 2520 rootOffset += nr; 2521 } else { 2522 for (i = 0; i < nr; i++) { 2523 tmp_rootdata[i].index = i; 2524 tmp_rootdata[i].rank = (PetscInt)rank; 2525 } 2526 } 2527 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2528 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2529 PetscCall(PetscSFDestroy(&tmp_sf)); 2530 PetscCall(PetscFree(tmp_rootdata)); 2531 } 2532 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above 2533 2534 /* Build the new SF */ 2535 PetscCall(PetscSFCreate(comm, newsf)); 2536 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2537 } else { 2538 /* Build the new SF */ 2539 PetscCall(PetscSFCreate(comm, newsf)); 2540 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2541 } 2542 PetscCall(PetscSFSetUp(*newsf)); 2543 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2544 PetscCall(PetscLayoutDestroy(&glayout)); 2545 PetscCall(PetscFree(gremote)); 2546 PetscCall(PetscFree(leafArrayOffsets)); 2547 PetscFunctionReturn(PETSC_SUCCESS); 2548 } 2549