1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME); 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 28 /*@ 29 PetscSFCreate - create a star forest communication context 30 31 Collective 32 33 Input Parameter: 34 . comm - communicator on which the star forest will operate 35 36 Output Parameter: 37 . sf - new star forest context 38 39 Options Database Keys: 40 . -sf_type type - value of type may be 41 .vb 42 basic -Use MPI persistent Isend/Irecv for communication (Default) 43 window -Use MPI-3 one-sided window for communication 44 neighbor -Use MPI-3 neighborhood collectives for communication 45 .ve 46 47 Level: intermediate 48 49 Note: 50 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 51 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 52 `SF`s are optimized and they have better performance than general `SF`s. 53 54 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 55 @*/ 56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 57 { 58 PetscSF b; 59 60 PetscFunctionBegin; 61 PetscValidPointer(sf, 2); 62 PetscCall(PetscSFInitializePackage()); 63 64 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 65 66 b->nroots = -1; 67 b->nleaves = -1; 68 b->minleaf = PETSC_MAX_INT; 69 b->maxleaf = PETSC_MIN_INT; 70 b->nranks = -1; 71 b->rankorder = PETSC_TRUE; 72 b->ingroup = MPI_GROUP_NULL; 73 b->outgroup = MPI_GROUP_NULL; 74 b->graphset = PETSC_FALSE; 75 #if defined(PETSC_HAVE_DEVICE) 76 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 77 b->use_stream_aware_mpi = PETSC_FALSE; 78 b->unknown_input_stream = PETSC_FALSE; 79 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 80 b->backend = PETSCSF_BACKEND_KOKKOS; 81 #elif defined(PETSC_HAVE_CUDA) 82 b->backend = PETSCSF_BACKEND_CUDA; 83 #elif defined(PETSC_HAVE_HIP) 84 b->backend = PETSCSF_BACKEND_HIP; 85 #endif 86 87 #if defined(PETSC_HAVE_NVSHMEM) 88 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 89 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 90 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 91 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 92 #endif 93 #endif 94 b->vscat.from_n = -1; 95 b->vscat.to_n = -1; 96 b->vscat.unit = MPIU_SCALAR; 97 *sf = b; 98 PetscFunctionReturn(0); 99 } 100 101 /*@ 102 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 103 104 Collective 105 106 Input Parameter: 107 . sf - star forest 108 109 Level: advanced 110 111 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 112 @*/ 113 PetscErrorCode PetscSFReset(PetscSF sf) 114 { 115 PetscFunctionBegin; 116 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 117 PetscTryTypeMethod(sf, Reset); 118 sf->nroots = -1; 119 sf->nleaves = -1; 120 sf->minleaf = PETSC_MAX_INT; 121 sf->maxleaf = PETSC_MIN_INT; 122 sf->mine = NULL; 123 sf->remote = NULL; 124 sf->graphset = PETSC_FALSE; 125 PetscCall(PetscFree(sf->mine_alloc)); 126 PetscCall(PetscFree(sf->remote_alloc)); 127 sf->nranks = -1; 128 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 129 sf->degreeknown = PETSC_FALSE; 130 PetscCall(PetscFree(sf->degree)); 131 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 132 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 133 if (sf->multi) sf->multi->multi = NULL; 134 PetscCall(PetscSFDestroy(&sf->multi)); 135 PetscCall(PetscLayoutDestroy(&sf->map)); 136 137 #if defined(PETSC_HAVE_DEVICE) 138 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 139 #endif 140 141 sf->setupcalled = PETSC_FALSE; 142 PetscFunctionReturn(0); 143 } 144 145 /*@C 146 PetscSFSetType - Set the `PetscSF` communication implementation 147 148 Collective on sf 149 150 Input Parameters: 151 + sf - the `PetscSF` context 152 - type - a known method 153 .vb 154 PETSCSFWINDOW - MPI-2/3 one-sided 155 PETSCSFBASIC - basic implementation using MPI-1 two-sided 156 .ve 157 158 Options Database Key: 159 . -sf_type <type> - Sets the method; for example basic or window use -help for a list of available methods (for instance, window, basic, neighbor) 160 161 Level: intermediate 162 163 Notes: 164 See "include/petscsf.h" for available methods (for instance) 165 166 .seealso: `PetscSFType`, `PetscSFCreate()` 167 @*/ 168 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 169 { 170 PetscBool match; 171 PetscErrorCode (*r)(PetscSF); 172 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 175 PetscValidCharPointer(type, 2); 176 177 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 178 if (match) PetscFunctionReturn(0); 179 180 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 181 PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 182 /* Destroy the previous PetscSF implementation context */ 183 PetscTryTypeMethod(sf, Destroy); 184 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 185 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 186 PetscCall((*r)(sf)); 187 PetscFunctionReturn(0); 188 } 189 190 /*@C 191 PetscSFGetType - Get the `PetscSF` communication implementation 192 193 Not Collective 194 195 Input Parameter: 196 . sf - the `PetscSF` context 197 198 Output Parameter: 199 . type - the `PetscSF` type name 200 201 Level: intermediate 202 203 .seealso: `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 204 @*/ 205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 206 { 207 PetscFunctionBegin; 208 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 209 PetscValidPointer(type, 2); 210 *type = ((PetscObject)sf)->type_name; 211 PetscFunctionReturn(0); 212 } 213 214 /*@C 215 PetscSFDestroy - destroy star forest 216 217 Collective 218 219 Input Parameter: 220 . sf - address of star forest 221 222 Level: intermediate 223 224 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 225 @*/ 226 PetscErrorCode PetscSFDestroy(PetscSF *sf) 227 { 228 PetscFunctionBegin; 229 if (!*sf) PetscFunctionReturn(0); 230 PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1); 231 if (--((PetscObject)(*sf))->refct > 0) { 232 *sf = NULL; 233 PetscFunctionReturn(0); 234 } 235 PetscCall(PetscSFReset(*sf)); 236 PetscTryTypeMethod((*sf), Destroy); 237 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 238 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 239 PetscCall(PetscHeaderDestroy(sf)); 240 PetscFunctionReturn(0); 241 } 242 243 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 244 { 245 PetscInt i, nleaves; 246 PetscMPIInt size; 247 const PetscInt *ilocal; 248 const PetscSFNode *iremote; 249 250 PetscFunctionBegin; 251 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 252 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 253 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 254 for (i = 0; i < nleaves; i++) { 255 const PetscInt rank = iremote[i].rank; 256 const PetscInt remote = iremote[i].index; 257 const PetscInt leaf = ilocal ? ilocal[i] : i; 258 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 259 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 260 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 261 } 262 PetscFunctionReturn(0); 263 } 264 265 /*@ 266 PetscSFSetUp - set up communication structures 267 268 Collective 269 270 Input Parameter: 271 . sf - star forest communication object 272 273 Level: beginner 274 275 .seealso: `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 276 @*/ 277 PetscErrorCode PetscSFSetUp(PetscSF sf) 278 { 279 PetscFunctionBegin; 280 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 281 PetscSFCheckGraphSet(sf, 1); 282 if (sf->setupcalled) PetscFunctionReturn(0); 283 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 284 PetscCall(PetscSFCheckGraphValid_Private(sf)); 285 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 286 PetscTryTypeMethod(sf, SetUp); 287 #if defined(PETSC_HAVE_CUDA) 288 if (sf->backend == PETSCSF_BACKEND_CUDA) { 289 sf->ops->Malloc = PetscSFMalloc_CUDA; 290 sf->ops->Free = PetscSFFree_CUDA; 291 } 292 #endif 293 #if defined(PETSC_HAVE_HIP) 294 if (sf->backend == PETSCSF_BACKEND_HIP) { 295 sf->ops->Malloc = PetscSFMalloc_HIP; 296 sf->ops->Free = PetscSFFree_HIP; 297 } 298 #endif 299 300 # 301 #if defined(PETSC_HAVE_KOKKOS) 302 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 303 sf->ops->Malloc = PetscSFMalloc_Kokkos; 304 sf->ops->Free = PetscSFFree_Kokkos; 305 } 306 #endif 307 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 308 sf->setupcalled = PETSC_TRUE; 309 PetscFunctionReturn(0); 310 } 311 312 /*@ 313 PetscSFSetFromOptions - set `PetscSF` options using the options database 314 315 Logically Collective 316 317 Input Parameter: 318 . sf - star forest 319 320 Options Database Keys: 321 + -sf_type - implementation type, see PetscSFSetType() 322 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 323 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 324 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 325 If true, this option only works with -use_gpu_aware_mpi 1. 326 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 327 If true, this option only works with -use_gpu_aware_mpi 1. 328 329 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 330 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 331 the only available is kokkos. 332 333 Level: intermediate 334 335 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 336 @*/ 337 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 338 { 339 PetscSFType deft; 340 char type[256]; 341 PetscBool flg; 342 343 PetscFunctionBegin; 344 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 345 PetscObjectOptionsBegin((PetscObject)sf); 346 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 347 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 348 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 349 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 350 #if defined(PETSC_HAVE_DEVICE) 351 { 352 char backendstr[32] = {0}; 353 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 354 /* Change the defaults set in PetscSFCreate() with command line options */ 355 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 356 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 357 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 358 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 359 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 360 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 361 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 362 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 363 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 364 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 365 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 366 #elif defined(PETSC_HAVE_KOKKOS) 367 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 368 #endif 369 } 370 #endif 371 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 372 PetscOptionsEnd(); 373 PetscFunctionReturn(0); 374 } 375 376 /*@ 377 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 378 379 Logically Collective 380 381 Input Parameters: 382 + sf - star forest 383 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 384 385 Level: advanced 386 387 .seealso: `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 388 @*/ 389 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 390 { 391 PetscFunctionBegin; 392 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 393 PetscValidLogicalCollectiveBool(sf, flg, 2); 394 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 395 sf->rankorder = flg; 396 PetscFunctionReturn(0); 397 } 398 399 /*@C 400 PetscSFSetGraph - Set a parallel star forest 401 402 Collective 403 404 Input Parameters: 405 + sf - star forest 406 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 407 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 408 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 409 during setup in debug mode) 410 . localmode - copy mode for ilocal 411 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 412 during setup in debug mode) 413 - remotemode - copy mode for iremote 414 415 Level: intermediate 416 417 Notes: 418 Leaf indices in ilocal must be unique, otherwise an error occurs. 419 420 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 421 In particular, if localmode/remotemode is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 422 PETSc might modify the respective array; 423 if `PETSC_USE_POINTER`, the user must delete the array after PetscSFDestroy(). 424 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 425 426 Fortran Note: 427 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 428 429 Developer Note: 430 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 431 This also allows to compare leaf sets of two SFs easily. 432 433 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 434 @*/ 435 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 436 { 437 PetscBool unique, contiguous; 438 439 PetscFunctionBegin; 440 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 441 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4); 442 if (nleaves > 0) PetscValidPointer(iremote, 6); 443 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 444 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 445 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 446 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 447 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 448 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 449 450 if (sf->nroots >= 0) { /* Reset only if graph already set */ 451 PetscCall(PetscSFReset(sf)); 452 } 453 454 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 455 456 sf->nroots = nroots; 457 sf->nleaves = nleaves; 458 459 if (localmode == PETSC_COPY_VALUES && ilocal) { 460 PetscInt *tlocal = NULL; 461 462 PetscCall(PetscMalloc1(nleaves, &tlocal)); 463 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 464 ilocal = tlocal; 465 } 466 if (remotemode == PETSC_COPY_VALUES) { 467 PetscSFNode *tremote = NULL; 468 469 PetscCall(PetscMalloc1(nleaves, &tremote)); 470 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 471 iremote = tremote; 472 } 473 474 if (nleaves && ilocal) { 475 PetscSFNode work; 476 477 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 478 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 479 unique = PetscNot(unique); 480 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 481 sf->minleaf = ilocal[0]; 482 sf->maxleaf = ilocal[nleaves - 1]; 483 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 484 } else { 485 sf->minleaf = 0; 486 sf->maxleaf = nleaves - 1; 487 unique = PETSC_TRUE; 488 contiguous = PETSC_TRUE; 489 } 490 491 if (contiguous) { 492 if (localmode == PETSC_USE_POINTER) { 493 ilocal = NULL; 494 } else { 495 PetscCall(PetscFree(ilocal)); 496 } 497 } 498 sf->mine = ilocal; 499 if (localmode == PETSC_USE_POINTER) { 500 sf->mine_alloc = NULL; 501 } else { 502 sf->mine_alloc = ilocal; 503 } 504 sf->remote = iremote; 505 if (remotemode == PETSC_USE_POINTER) { 506 sf->remote_alloc = NULL; 507 } else { 508 sf->remote_alloc = iremote; 509 } 510 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 511 sf->graphset = PETSC_TRUE; 512 PetscFunctionReturn(0); 513 } 514 515 /*@ 516 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 517 518 Collective 519 520 Input Parameters: 521 + sf - The `PetscSF` 522 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 523 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 524 525 Level: intermediate 526 527 Notes: 528 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector x and its layout is map. 529 n and N are local and global sizes of x respectively. 530 531 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to 532 sequential vectors y on all ranks. 533 534 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to a 535 sequential vector y on rank 0. 536 537 In above cases, entries of x are roots and entries of y are leaves. 538 539 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of sf's communicator. The routine 540 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 541 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 542 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 543 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 544 545 In this case, roots and leaves are symmetric. 546 547 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 548 @*/ 549 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 550 { 551 MPI_Comm comm; 552 PetscInt n, N, res[2]; 553 PetscMPIInt rank, size; 554 PetscSFType type; 555 556 PetscFunctionBegin; 557 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 558 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2); 559 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 560 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 561 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 562 PetscCallMPI(MPI_Comm_size(comm, &size)); 563 564 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 565 type = PETSCSFALLTOALL; 566 PetscCall(PetscLayoutCreate(comm, &sf->map)); 567 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 568 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 569 PetscCall(PetscLayoutSetUp(sf->map)); 570 } else { 571 PetscCall(PetscLayoutGetLocalSize(map, &n)); 572 PetscCall(PetscLayoutGetSize(map, &N)); 573 res[0] = n; 574 res[1] = -n; 575 /* Check if n are same over all ranks so that we can optimize it */ 576 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 577 if (res[0] == -res[1]) { /* same n */ 578 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 579 } else { 580 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 581 } 582 PetscCall(PetscLayoutReference(map, &sf->map)); 583 } 584 PetscCall(PetscSFSetType(sf, type)); 585 586 sf->pattern = pattern; 587 sf->mine = NULL; /* Contiguous */ 588 589 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 590 Also set other easy stuff. 591 */ 592 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 593 sf->nleaves = N; 594 sf->nroots = n; 595 sf->nranks = size; 596 sf->minleaf = 0; 597 sf->maxleaf = N - 1; 598 } else if (pattern == PETSCSF_PATTERN_GATHER) { 599 sf->nleaves = rank ? 0 : N; 600 sf->nroots = n; 601 sf->nranks = rank ? 0 : size; 602 sf->minleaf = 0; 603 sf->maxleaf = rank ? -1 : N - 1; 604 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 605 sf->nleaves = size; 606 sf->nroots = size; 607 sf->nranks = size; 608 sf->minleaf = 0; 609 sf->maxleaf = size - 1; 610 } 611 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 612 sf->graphset = PETSC_TRUE; 613 PetscFunctionReturn(0); 614 } 615 616 /*@ 617 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 618 619 Collective 620 621 Input Parameter: 622 . sf - star forest to invert 623 624 Output Parameter: 625 . isf - inverse of sf 626 627 Level: advanced 628 629 Notes: 630 All roots must have degree 1. 631 632 The local space may be a permutation, but cannot be sparse. 633 634 .seealso: `PetscSFType`, `PetscSFSetGraph()` 635 @*/ 636 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 637 { 638 PetscMPIInt rank; 639 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 640 const PetscInt *ilocal; 641 PetscSFNode *roots, *leaves; 642 643 PetscFunctionBegin; 644 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 645 PetscSFCheckGraphSet(sf, 1); 646 PetscValidPointer(isf, 2); 647 648 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 649 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 650 651 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 652 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 653 for (i = 0; i < maxlocal; i++) { 654 leaves[i].rank = rank; 655 leaves[i].index = i; 656 } 657 for (i = 0; i < nroots; i++) { 658 roots[i].rank = -1; 659 roots[i].index = -1; 660 } 661 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 662 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 663 664 /* Check whether our leaves are sparse */ 665 for (i = 0, count = 0; i < nroots; i++) 666 if (roots[i].rank >= 0) count++; 667 if (count == nroots) newilocal = NULL; 668 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 669 for (i = 0, count = 0; i < nroots; i++) { 670 if (roots[i].rank >= 0) { 671 newilocal[count] = i; 672 roots[count].rank = roots[i].rank; 673 roots[count].index = roots[i].index; 674 count++; 675 } 676 } 677 } 678 679 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 680 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 681 PetscCall(PetscFree2(roots, leaves)); 682 PetscFunctionReturn(0); 683 } 684 685 /*@ 686 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 687 688 Collective 689 690 Input Parameters: 691 + sf - communication object to duplicate 692 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 693 694 Output Parameter: 695 . newsf - new communication object 696 697 Level: beginner 698 699 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 700 @*/ 701 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 702 { 703 PetscSFType type; 704 MPI_Datatype dtype = MPIU_SCALAR; 705 706 PetscFunctionBegin; 707 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 708 PetscValidLogicalCollectiveEnum(sf, opt, 2); 709 PetscValidPointer(newsf, 3); 710 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 711 PetscCall(PetscSFGetType(sf, &type)); 712 if (type) PetscCall(PetscSFSetType(*newsf, type)); 713 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */ 714 if (opt == PETSCSF_DUPLICATE_GRAPH) { 715 PetscSFCheckGraphSet(sf, 1); 716 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 717 PetscInt nroots, nleaves; 718 const PetscInt *ilocal; 719 const PetscSFNode *iremote; 720 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 721 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 722 } else { 723 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 724 } 725 } 726 /* Since oldtype is committed, so is newtype, according to MPI */ 727 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 728 (*newsf)->vscat.bs = sf->vscat.bs; 729 (*newsf)->vscat.unit = dtype; 730 (*newsf)->vscat.to_n = sf->vscat.to_n; 731 (*newsf)->vscat.from_n = sf->vscat.from_n; 732 /* Do not copy lsf. Build it on demand since it is rarely used */ 733 734 #if defined(PETSC_HAVE_DEVICE) 735 (*newsf)->backend = sf->backend; 736 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 737 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 738 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 739 #endif 740 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 741 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 742 PetscFunctionReturn(0); 743 } 744 745 /*@C 746 PetscSFGetGraph - Get the graph specifying a parallel star forest 747 748 Not Collective 749 750 Input Parameter: 751 . sf - star forest 752 753 Output Parameters: 754 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 755 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 756 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 757 - iremote - remote locations of root vertices for each leaf on the current process 758 759 Level: intermediate 760 761 Notes: 762 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 763 764 The returned ilocal/iremote might contain values in different order than the input ones in `PetscSFSetGraph()`, 765 see its manpage for details. 766 767 Fortran Notes: 768 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 769 want to update the graph, you must call `PetscSFSetGraph()` after modifying the iremote array. 770 771 To check for a NULL ilocal use 772 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 773 774 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 775 @*/ 776 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 777 { 778 PetscFunctionBegin; 779 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 780 if (sf->ops->GetGraph) { 781 PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote)); 782 } else { 783 if (nroots) *nroots = sf->nroots; 784 if (nleaves) *nleaves = sf->nleaves; 785 if (ilocal) *ilocal = sf->mine; 786 if (iremote) *iremote = sf->remote; 787 } 788 PetscFunctionReturn(0); 789 } 790 791 /*@ 792 PetscSFGetLeafRange - Get the active leaf ranges 793 794 Not Collective 795 796 Input Parameter: 797 . sf - star forest 798 799 Output Parameters: 800 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 801 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 802 803 Level: developer 804 805 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 806 @*/ 807 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 808 { 809 PetscFunctionBegin; 810 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 811 PetscSFCheckGraphSet(sf, 1); 812 if (minleaf) *minleaf = sf->minleaf; 813 if (maxleaf) *maxleaf = sf->maxleaf; 814 PetscFunctionReturn(0); 815 } 816 817 /*@C 818 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 819 820 Collective on A 821 822 Input Parameters: 823 + A - the star forest 824 . obj - Optional object that provides the prefix for the option names 825 - name - command line option 826 827 Level: intermediate 828 829 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 830 @*/ 831 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 832 { 833 PetscFunctionBegin; 834 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 835 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 836 PetscFunctionReturn(0); 837 } 838 839 /*@C 840 PetscSFView - view a star forest 841 842 Collective 843 844 Input Parameters: 845 + sf - star forest 846 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 847 848 Level: beginner 849 850 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 851 @*/ 852 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 853 { 854 PetscBool iascii; 855 PetscViewerFormat format; 856 857 PetscFunctionBegin; 858 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 859 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 860 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 861 PetscCheckSameComm(sf, 1, viewer, 2); 862 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 863 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 864 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 865 PetscMPIInt rank; 866 PetscInt ii, i, j; 867 868 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 869 PetscCall(PetscViewerASCIIPushTab(viewer)); 870 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 871 if (!sf->graphset) { 872 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 873 PetscCall(PetscViewerASCIIPopTab(viewer)); 874 PetscFunctionReturn(0); 875 } 876 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 877 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 878 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 879 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 880 PetscCall(PetscViewerFlush(viewer)); 881 PetscCall(PetscViewerGetFormat(viewer, &format)); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 PetscMPIInt *tmpranks, *perm; 884 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 885 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 886 for (i = 0; i < sf->nranks; i++) perm[i] = i; 887 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 888 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 889 for (ii = 0; ii < sf->nranks; ii++) { 890 i = perm[ii]; 891 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 892 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 893 } 894 PetscCall(PetscFree2(tmpranks, perm)); 895 } 896 PetscCall(PetscViewerFlush(viewer)); 897 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 898 } 899 PetscCall(PetscViewerASCIIPopTab(viewer)); 900 } 901 PetscTryTypeMethod(sf, View, viewer); 902 PetscFunctionReturn(0); 903 } 904 905 /*@C 906 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 907 908 Not Collective 909 910 Input Parameter: 911 . sf - star forest 912 913 Output Parameters: 914 + nranks - number of ranks referenced by local part 915 . ranks - array of ranks 916 . roffset - offset in rmine/rremote for each rank (length nranks+1) 917 . rmine - concatenated array holding local indices referencing each remote rank 918 - rremote - concatenated array holding remote indices referenced for each remote rank 919 920 Level: developer 921 922 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 923 @*/ 924 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 925 { 926 PetscFunctionBegin; 927 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 928 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 929 if (sf->ops->GetRootRanks) { 930 PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote)); 931 } else { 932 /* The generic implementation */ 933 if (nranks) *nranks = sf->nranks; 934 if (ranks) *ranks = sf->ranks; 935 if (roffset) *roffset = sf->roffset; 936 if (rmine) *rmine = sf->rmine; 937 if (rremote) *rremote = sf->rremote; 938 } 939 PetscFunctionReturn(0); 940 } 941 942 /*@C 943 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 944 945 Not Collective 946 947 Input Parameter: 948 . sf - star forest 949 950 Output Parameters: 951 + niranks - number of leaf ranks referencing roots on this process 952 . iranks - array of ranks 953 . ioffset - offset in irootloc for each rank (length niranks+1) 954 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 955 956 Level: developer 957 958 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 959 @*/ 960 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 961 { 962 PetscFunctionBegin; 963 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 964 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 965 if (sf->ops->GetLeafRanks) { 966 PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc)); 967 } else { 968 PetscSFType type; 969 PetscCall(PetscSFGetType(sf, &type)); 970 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 976 { 977 PetscInt i; 978 for (i = 0; i < n; i++) { 979 if (needle == list[i]) return PETSC_TRUE; 980 } 981 return PETSC_FALSE; 982 } 983 984 /*@C 985 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 986 987 Collective 988 989 Input Parameters: 990 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 991 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 992 993 Level: developer 994 995 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 996 @*/ 997 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 998 { 999 PetscTable table; 1000 PetscTablePosition pos; 1001 PetscMPIInt size, groupsize, *groupranks; 1002 PetscInt *rcount, *ranks; 1003 PetscInt i, irank = -1, orank = -1; 1004 1005 PetscFunctionBegin; 1006 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1007 PetscSFCheckGraphSet(sf, 1); 1008 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1009 PetscCall(PetscTableCreate(10, size, &table)); 1010 for (i = 0; i < sf->nleaves; i++) { 1011 /* Log 1-based rank */ 1012 PetscCall(PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1013 } 1014 PetscCall(PetscTableGetCount(table, &sf->nranks)); 1015 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1016 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1017 PetscCall(PetscTableGetHeadPosition(table, &pos)); 1018 for (i = 0; i < sf->nranks; i++) { 1019 PetscCall(PetscTableGetNext(table, &pos, &ranks[i], &rcount[i])); 1020 ranks[i]--; /* Convert back to 0-based */ 1021 } 1022 PetscCall(PetscTableDestroy(&table)); 1023 1024 /* We expect that dgroup is reliably "small" while nranks could be large */ 1025 { 1026 MPI_Group group = MPI_GROUP_NULL; 1027 PetscMPIInt *dgroupranks; 1028 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1029 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1030 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1031 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1032 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1033 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1034 PetscCallMPI(MPI_Group_free(&group)); 1035 PetscCall(PetscFree(dgroupranks)); 1036 } 1037 1038 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1039 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1040 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1041 if (InList(ranks[i], groupsize, groupranks)) break; 1042 } 1043 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1044 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1045 } 1046 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1047 PetscInt tmprank, tmpcount; 1048 1049 tmprank = ranks[i]; 1050 tmpcount = rcount[i]; 1051 ranks[i] = ranks[sf->ndranks]; 1052 rcount[i] = rcount[sf->ndranks]; 1053 ranks[sf->ndranks] = tmprank; 1054 rcount[sf->ndranks] = tmpcount; 1055 sf->ndranks++; 1056 } 1057 } 1058 PetscCall(PetscFree(groupranks)); 1059 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1060 PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1061 sf->roffset[0] = 0; 1062 for (i = 0; i < sf->nranks; i++) { 1063 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1064 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1065 rcount[i] = 0; 1066 } 1067 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1068 /* short circuit */ 1069 if (orank != sf->remote[i].rank) { 1070 /* Search for index of iremote[i].rank in sf->ranks */ 1071 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1072 if (irank < 0) { 1073 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1074 if (irank >= 0) irank += sf->ndranks; 1075 } 1076 orank = sf->remote[i].rank; 1077 } 1078 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1079 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1080 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1081 rcount[irank]++; 1082 } 1083 PetscCall(PetscFree2(rcount, ranks)); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /*@C 1088 PetscSFGetGroups - gets incoming and outgoing process groups 1089 1090 Collective 1091 1092 Input Parameter: 1093 . sf - star forest 1094 1095 Output Parameters: 1096 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1097 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1098 1099 Level: developer 1100 1101 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1102 @*/ 1103 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1104 { 1105 MPI_Group group = MPI_GROUP_NULL; 1106 1107 PetscFunctionBegin; 1108 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1109 if (sf->ingroup == MPI_GROUP_NULL) { 1110 PetscInt i; 1111 const PetscInt *indegree; 1112 PetscMPIInt rank, *outranks, *inranks; 1113 PetscSFNode *remote; 1114 PetscSF bgcount; 1115 1116 /* Compute the number of incoming ranks */ 1117 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1118 for (i = 0; i < sf->nranks; i++) { 1119 remote[i].rank = sf->ranks[i]; 1120 remote[i].index = 0; 1121 } 1122 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1123 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1124 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1125 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1126 /* Enumerate the incoming ranks */ 1127 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1128 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1129 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1130 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1131 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1132 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1133 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1134 PetscCallMPI(MPI_Group_free(&group)); 1135 PetscCall(PetscFree2(inranks, outranks)); 1136 PetscCall(PetscSFDestroy(&bgcount)); 1137 } 1138 *incoming = sf->ingroup; 1139 1140 if (sf->outgroup == MPI_GROUP_NULL) { 1141 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1142 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1143 PetscCallMPI(MPI_Group_free(&group)); 1144 } 1145 *outgoing = sf->outgroup; 1146 PetscFunctionReturn(0); 1147 } 1148 1149 /*@ 1150 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1151 1152 Collective 1153 1154 Input Parameter: 1155 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1156 1157 Output Parameter: 1158 . multi - star forest with split roots, such that each root has degree exactly 1 1159 1160 Level: developer 1161 1162 Note: 1163 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1164 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1165 edge, it is a candidate for future optimization that might involve its removal. 1166 1167 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1168 @*/ 1169 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1170 { 1171 PetscFunctionBegin; 1172 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1173 PetscValidPointer(multi, 2); 1174 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1175 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1176 *multi = sf->multi; 1177 sf->multi->multi = sf->multi; 1178 PetscFunctionReturn(0); 1179 } 1180 if (!sf->multi) { 1181 const PetscInt *indegree; 1182 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1183 PetscSFNode *remote; 1184 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1185 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1186 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1187 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1188 inoffset[0] = 0; 1189 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1190 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1191 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1192 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1193 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1194 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1195 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1196 } 1197 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1198 for (i = 0; i < sf->nleaves; i++) { 1199 remote[i].rank = sf->remote[i].rank; 1200 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1201 } 1202 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1203 sf->multi->multi = sf->multi; 1204 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1205 if (sf->rankorder) { /* Sort the ranks */ 1206 PetscMPIInt rank; 1207 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1208 PetscSFNode *newremote; 1209 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1210 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1211 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1212 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1213 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1214 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1215 /* Sort the incoming ranks at each vertex, build the inverse map */ 1216 for (i = 0; i < sf->nroots; i++) { 1217 PetscInt j; 1218 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1219 PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset)); 1220 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1221 } 1222 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1223 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1224 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1225 for (i = 0; i < sf->nleaves; i++) { 1226 newremote[i].rank = sf->remote[i].rank; 1227 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1228 } 1229 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1230 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1231 } 1232 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1233 } 1234 *multi = sf->multi; 1235 PetscFunctionReturn(0); 1236 } 1237 1238 /*@C 1239 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1240 1241 Collective 1242 1243 Input Parameters: 1244 + sf - original star forest 1245 . nselected - number of selected roots on this process 1246 - selected - indices of the selected roots on this process 1247 1248 Output Parameter: 1249 . esf - new star forest 1250 1251 Level: advanced 1252 1253 Note: 1254 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1255 be done by calling PetscSFGetGraph(). 1256 1257 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1258 @*/ 1259 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1260 { 1261 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1262 const PetscInt *ilocal; 1263 signed char *rootdata, *leafdata, *leafmem; 1264 const PetscSFNode *iremote; 1265 PetscSFNode *new_iremote; 1266 MPI_Comm comm; 1267 1268 PetscFunctionBegin; 1269 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1270 PetscSFCheckGraphSet(sf, 1); 1271 if (nselected) PetscValidIntPointer(selected, 3); 1272 PetscValidPointer(esf, 4); 1273 1274 PetscCall(PetscSFSetUp(sf)); 1275 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1276 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1277 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1278 1279 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1280 PetscBool dups; 1281 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1282 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1283 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1284 } 1285 1286 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1287 else { 1288 /* A generic version of creating embedded sf */ 1289 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1290 maxlocal = maxleaf - minleaf + 1; 1291 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1292 leafdata = leafmem - minleaf; 1293 /* Tag selected roots and bcast to leaves */ 1294 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1295 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1296 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1297 1298 /* Build esf with leaves that are still connected */ 1299 esf_nleaves = 0; 1300 for (i = 0; i < nleaves; i++) { 1301 j = ilocal ? ilocal[i] : i; 1302 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1303 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1304 */ 1305 esf_nleaves += (leafdata[j] ? 1 : 0); 1306 } 1307 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1308 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1309 for (i = n = 0; i < nleaves; i++) { 1310 j = ilocal ? ilocal[i] : i; 1311 if (leafdata[j]) { 1312 new_ilocal[n] = j; 1313 new_iremote[n].rank = iremote[i].rank; 1314 new_iremote[n].index = iremote[i].index; 1315 ++n; 1316 } 1317 } 1318 PetscCall(PetscSFCreate(comm, esf)); 1319 PetscCall(PetscSFSetFromOptions(*esf)); 1320 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1321 PetscCall(PetscFree2(rootdata, leafmem)); 1322 } 1323 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 /*@C 1328 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1329 1330 Collective 1331 1332 Input Parameters: 1333 + sf - original star forest 1334 . nselected - number of selected leaves on this process 1335 - selected - indices of the selected leaves on this process 1336 1337 Output Parameter: 1338 . newsf - new star forest 1339 1340 Level: advanced 1341 1342 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1343 @*/ 1344 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1345 { 1346 const PetscSFNode *iremote; 1347 PetscSFNode *new_iremote; 1348 const PetscInt *ilocal; 1349 PetscInt i, nroots, *leaves, *new_ilocal; 1350 MPI_Comm comm; 1351 1352 PetscFunctionBegin; 1353 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1354 PetscSFCheckGraphSet(sf, 1); 1355 if (nselected) PetscValidIntPointer(selected, 3); 1356 PetscValidPointer(newsf, 4); 1357 1358 /* Uniq selected[] and put results in leaves[] */ 1359 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1360 PetscCall(PetscMalloc1(nselected, &leaves)); 1361 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1362 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1363 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1364 1365 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1366 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1367 else { 1368 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1369 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1370 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1371 for (i = 0; i < nselected; ++i) { 1372 const PetscInt l = leaves[i]; 1373 new_ilocal[i] = ilocal ? ilocal[l] : l; 1374 new_iremote[i].rank = iremote[l].rank; 1375 new_iremote[i].index = iremote[l].index; 1376 } 1377 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1378 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1379 } 1380 PetscCall(PetscFree(leaves)); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 /*@C 1385 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1386 1387 Collective on sf 1388 1389 Input Parameters: 1390 + sf - star forest on which to communicate 1391 . unit - data type associated with each node 1392 . rootdata - buffer to broadcast 1393 - op - operation to use for reduction 1394 1395 Output Parameter: 1396 . leafdata - buffer to be reduced with values from each leaf's respective root 1397 1398 Level: intermediate 1399 1400 Notes: 1401 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1402 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1403 use `PetscSFBcastWithMemTypeBegin()` instead. 1404 1405 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1406 @*/ 1407 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1408 { 1409 PetscMemType rootmtype, leafmtype; 1410 1411 PetscFunctionBegin; 1412 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1413 PetscCall(PetscSFSetUp(sf)); 1414 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1415 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1416 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1417 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1418 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 /*@C 1423 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to `PetscSFBcastEnd()` 1424 1425 Collective on sf 1426 1427 Input Parameters: 1428 + sf - star forest on which to communicate 1429 . unit - data type associated with each node 1430 . rootmtype - memory type of rootdata 1431 . rootdata - buffer to broadcast 1432 . leafmtype - memory type of leafdata 1433 - op - operation to use for reduction 1434 1435 Output Parameter: 1436 . leafdata - buffer to be reduced with values from each leaf's respective root 1437 1438 Level: intermediate 1439 1440 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1441 @*/ 1442 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1443 { 1444 PetscFunctionBegin; 1445 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1446 PetscCall(PetscSFSetUp(sf)); 1447 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1448 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1449 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 /*@C 1454 PetscSFBcastEnd - end a broadcast & reduce operation started with `PetscSFBcastBegin()` 1455 1456 Collective 1457 1458 Input Parameters: 1459 + sf - star forest 1460 . unit - data type 1461 . rootdata - buffer to broadcast 1462 - op - operation to use for reduction 1463 1464 Output Parameter: 1465 . leafdata - buffer to be reduced with values from each leaf's respective root 1466 1467 Level: intermediate 1468 1469 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1470 @*/ 1471 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1472 { 1473 PetscFunctionBegin; 1474 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1475 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1476 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1477 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 /*@C 1482 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1483 1484 Collective 1485 1486 Input Parameters: 1487 + sf - star forest 1488 . unit - data type 1489 . leafdata - values to reduce 1490 - op - reduction operation 1491 1492 Output Parameter: 1493 . rootdata - result of reduction of values from all leaves of each root 1494 1495 Level: intermediate 1496 1497 Notes: 1498 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1499 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1500 use `PetscSFReduceWithMemTypeBegin()` instead. 1501 1502 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()` 1503 @*/ 1504 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1505 { 1506 PetscMemType rootmtype, leafmtype; 1507 1508 PetscFunctionBegin; 1509 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1510 PetscCall(PetscSFSetUp(sf)); 1511 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1512 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1513 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1514 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1515 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1516 PetscFunctionReturn(0); 1517 } 1518 1519 /*@C 1520 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1521 1522 Collective 1523 1524 Input Parameters: 1525 + sf - star forest 1526 . unit - data type 1527 . leafmtype - memory type of leafdata 1528 . leafdata - values to reduce 1529 . rootmtype - memory type of rootdata 1530 - op - reduction operation 1531 1532 Output Parameter: 1533 . rootdata - result of reduction of values from all leaves of each root 1534 1535 Level: intermediate 1536 1537 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()` 1538 @*/ 1539 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1540 { 1541 PetscFunctionBegin; 1542 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1543 PetscCall(PetscSFSetUp(sf)); 1544 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1545 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1546 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1547 PetscFunctionReturn(0); 1548 } 1549 1550 /*@C 1551 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` 1552 1553 Collective 1554 1555 Input Parameters: 1556 + sf - star forest 1557 . unit - data type 1558 . leafdata - values to reduce 1559 - op - reduction operation 1560 1561 Output Parameter: 1562 . rootdata - result of reduction of values from all leaves of each root 1563 1564 Level: intermediate 1565 1566 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()` 1567 @*/ 1568 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1569 { 1570 PetscFunctionBegin; 1571 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1572 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1573 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1574 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 /*@C 1579 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1580 to be completed with `PetscSFFetchAndOpEnd()` 1581 1582 Collective 1583 1584 Input Parameters: 1585 + sf - star forest 1586 . unit - data type 1587 . leafdata - leaf values to use in reduction 1588 - op - operation to use for reduction 1589 1590 Output Parameters: 1591 + rootdata - root values to be updated, input state is seen by first process to perform an update 1592 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1593 1594 Level: advanced 1595 1596 Note: 1597 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1598 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1599 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1600 integers. 1601 1602 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1603 @*/ 1604 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1605 { 1606 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1607 1608 PetscFunctionBegin; 1609 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1610 PetscCall(PetscSFSetUp(sf)); 1611 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1612 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1613 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1614 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1615 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1616 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1617 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1618 PetscFunctionReturn(0); 1619 } 1620 1621 /*@C 1622 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1623 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1624 1625 Collective 1626 1627 Input Parameters: 1628 + sf - star forest 1629 . unit - data type 1630 . rootmtype - memory type of rootdata 1631 . leafmtype - memory type of leafdata 1632 . leafdata - leaf values to use in reduction 1633 . leafupdatemtype - memory type of leafupdate 1634 - op - operation to use for reduction 1635 1636 Output Parameters: 1637 + rootdata - root values to be updated, input state is seen by first process to perform an update 1638 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1639 1640 Level: advanced 1641 1642 Note: 1643 See `PetscSFFetchAndOpBegin()` for more details. 1644 1645 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1646 @*/ 1647 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1648 { 1649 PetscFunctionBegin; 1650 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1651 PetscCall(PetscSFSetUp(sf)); 1652 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1653 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1654 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1655 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1656 PetscFunctionReturn(0); 1657 } 1658 1659 /*@C 1660 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1661 1662 Collective 1663 1664 Input Parameters: 1665 + sf - star forest 1666 . unit - data type 1667 . leafdata - leaf values to use in reduction 1668 - op - operation to use for reduction 1669 1670 Output Parameters: 1671 + rootdata - root values to be updated, input state is seen by first process to perform an update 1672 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1673 1674 Level: advanced 1675 1676 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()` 1677 @*/ 1678 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1679 { 1680 PetscFunctionBegin; 1681 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1682 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1683 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1684 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1685 PetscFunctionReturn(0); 1686 } 1687 1688 /*@C 1689 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1690 1691 Collective 1692 1693 Input Parameter: 1694 . sf - star forest 1695 1696 Output Parameter: 1697 . degree - degree of each root vertex 1698 1699 Level: advanced 1700 1701 Note: 1702 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it. 1703 1704 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1705 @*/ 1706 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) 1707 { 1708 PetscFunctionBegin; 1709 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1710 PetscSFCheckGraphSet(sf, 1); 1711 PetscValidPointer(degree, 2); 1712 if (!sf->degreeknown) { 1713 PetscInt i, nroots = sf->nroots, maxlocal; 1714 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1715 maxlocal = sf->maxleaf - sf->minleaf + 1; 1716 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1717 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1718 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1719 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1720 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1721 } 1722 *degree = NULL; 1723 PetscFunctionReturn(0); 1724 } 1725 1726 /*@C 1727 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1728 1729 Collective 1730 1731 Input Parameter: 1732 . sf - star forest 1733 1734 Output Parameter: 1735 . degree - degree of each root vertex 1736 1737 Level: developer 1738 1739 Note: 1740 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it. 1741 1742 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1743 @*/ 1744 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1745 { 1746 PetscFunctionBegin; 1747 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1748 PetscSFCheckGraphSet(sf, 1); 1749 PetscValidPointer(degree, 2); 1750 if (!sf->degreeknown) { 1751 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1752 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1753 PetscCall(PetscFree(sf->degreetmp)); 1754 sf->degreeknown = PETSC_TRUE; 1755 } 1756 *degree = sf->degree; 1757 PetscFunctionReturn(0); 1758 } 1759 1760 /*@C 1761 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by `PetscSFGetMultiSF()`). 1762 Each multi-root is assigned index of the corresponding original root. 1763 1764 Collective 1765 1766 Input Parameters: 1767 + sf - star forest 1768 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1769 1770 Output Parameters: 1771 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1772 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1773 1774 Level: developer 1775 1776 Note: 1777 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1778 1779 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1780 @*/ 1781 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1782 { 1783 PetscSF msf; 1784 PetscInt i, j, k, nroots, nmroots; 1785 1786 PetscFunctionBegin; 1787 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1788 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1789 if (nroots) PetscValidIntPointer(degree, 2); 1790 if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3); 1791 PetscValidPointer(multiRootsOrigNumbering, 4); 1792 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1793 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1794 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1795 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1796 if (!degree[i]) continue; 1797 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1798 } 1799 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1800 if (nMultiRoots) *nMultiRoots = nmroots; 1801 PetscFunctionReturn(0); 1802 } 1803 1804 /*@C 1805 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1806 1807 Collective 1808 1809 Input Parameters: 1810 + sf - star forest 1811 . unit - data type 1812 - leafdata - leaf data to gather to roots 1813 1814 Output Parameter: 1815 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1816 1817 Level: intermediate 1818 1819 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1820 @*/ 1821 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1822 { 1823 PetscSF multi = NULL; 1824 1825 PetscFunctionBegin; 1826 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1827 PetscCall(PetscSFSetUp(sf)); 1828 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1829 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1830 PetscFunctionReturn(0); 1831 } 1832 1833 /*@C 1834 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1835 1836 Collective 1837 1838 Input Parameters: 1839 + sf - star forest 1840 . unit - data type 1841 - leafdata - leaf data to gather to roots 1842 1843 Output Parameter: 1844 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1845 1846 Level: intermediate 1847 1848 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1849 @*/ 1850 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1851 { 1852 PetscSF multi = NULL; 1853 1854 PetscFunctionBegin; 1855 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1856 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1857 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 /*@C 1862 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1863 1864 Collective 1865 1866 Input Parameters: 1867 + sf - star forest 1868 . unit - data type 1869 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1870 1871 Output Parameter: 1872 . leafdata - leaf data to be update with personal data from each respective root 1873 1874 Level: intermediate 1875 1876 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1877 @*/ 1878 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1879 { 1880 PetscSF multi = NULL; 1881 1882 PetscFunctionBegin; 1883 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1884 PetscCall(PetscSFSetUp(sf)); 1885 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1886 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1887 PetscFunctionReturn(0); 1888 } 1889 1890 /*@C 1891 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1892 1893 Collective 1894 1895 Input Parameters: 1896 + sf - star forest 1897 . unit - data type 1898 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1899 1900 Output Parameter: 1901 . leafdata - leaf data to be update with personal data from each respective root 1902 1903 Level: intermediate 1904 1905 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1906 @*/ 1907 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1908 { 1909 PetscSF multi = NULL; 1910 1911 PetscFunctionBegin; 1912 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1913 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1914 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1915 PetscFunctionReturn(0); 1916 } 1917 1918 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1919 { 1920 PetscInt i, n, nleaves; 1921 const PetscInt *ilocal = NULL; 1922 PetscHSetI seen; 1923 1924 PetscFunctionBegin; 1925 if (PetscDefined(USE_DEBUG)) { 1926 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1927 PetscCall(PetscHSetICreate(&seen)); 1928 for (i = 0; i < nleaves; i++) { 1929 const PetscInt leaf = ilocal ? ilocal[i] : i; 1930 PetscCall(PetscHSetIAdd(seen, leaf)); 1931 } 1932 PetscCall(PetscHSetIGetSize(seen, &n)); 1933 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 1934 PetscCall(PetscHSetIDestroy(&seen)); 1935 } 1936 PetscFunctionReturn(0); 1937 } 1938 1939 /*@ 1940 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 1941 1942 Input Parameters: 1943 + sfA - The first `PetscSF` 1944 - sfB - The second `PetscSF` 1945 1946 Output Parameters: 1947 . sfBA - The composite `PetscSF` 1948 1949 Level: developer 1950 1951 Notes: 1952 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 1953 forests, i.e. the same leaf is not connected with different roots. 1954 1955 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1956 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1957 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1958 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1959 1960 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1961 @*/ 1962 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 1963 { 1964 const PetscSFNode *remotePointsA, *remotePointsB; 1965 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 1966 const PetscInt *localPointsA, *localPointsB; 1967 PetscInt *localPointsBA; 1968 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 1969 PetscBool denseB; 1970 1971 PetscFunctionBegin; 1972 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 1973 PetscSFCheckGraphSet(sfA, 1); 1974 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 1975 PetscSFCheckGraphSet(sfB, 2); 1976 PetscCheckSameComm(sfA, 1, sfB, 2); 1977 PetscValidPointer(sfBA, 3); 1978 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1979 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1980 1981 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 1982 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 1983 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size */ 1984 /* numRootsB; otherwise, garbage will be broadcasted. */ 1985 /* Example (comm size = 1): */ 1986 /* sfA: 0 <- (0, 0) */ 1987 /* sfB: 100 <- (0, 0) */ 1988 /* 101 <- (0, 1) */ 1989 /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget */ 1990 /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */ 1991 /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on */ 1992 /* remotePointsA; if not recasted, point 101 would receive a garbage value. */ 1993 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 1994 for (i = 0; i < numRootsB; i++) { 1995 reorderedRemotePointsA[i].rank = -1; 1996 reorderedRemotePointsA[i].index = -1; 1997 } 1998 for (i = 0; i < numLeavesA; i++) { 1999 PetscInt localp = localPointsA ? localPointsA[i] : i; 2000 2001 if (localp >= numRootsB) continue; 2002 reorderedRemotePointsA[localp] = remotePointsA[i]; 2003 } 2004 remotePointsA = reorderedRemotePointsA; 2005 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2006 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2007 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2008 leafdataB[i].rank = -1; 2009 leafdataB[i].index = -1; 2010 } 2011 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2012 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2013 PetscCall(PetscFree(reorderedRemotePointsA)); 2014 2015 denseB = (PetscBool)!localPointsB; 2016 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2017 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2018 else numLeavesBA++; 2019 } 2020 if (denseB) { 2021 localPointsBA = NULL; 2022 remotePointsBA = leafdataB; 2023 } else { 2024 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2025 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2026 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2027 const PetscInt l = localPointsB ? localPointsB[i] : i; 2028 2029 if (leafdataB[l - minleaf].rank == -1) continue; 2030 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2031 localPointsBA[numLeavesBA] = l; 2032 numLeavesBA++; 2033 } 2034 PetscCall(PetscFree(leafdataB)); 2035 } 2036 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2037 PetscCall(PetscSFSetFromOptions(*sfBA)); 2038 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 /*@ 2043 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2044 2045 Input Parameters: 2046 + sfA - The first `PetscSF` 2047 - sfB - The second `PetscSF` 2048 2049 Output Parameters: 2050 . sfBA - The composite `PetscSF`. 2051 2052 Level: developer 2053 2054 Notes: 2055 Currently, the two SFs must be defined on congruent communicators and they must be true star 2056 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2057 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2058 2059 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2060 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2061 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2062 a Reduce on sfB, on connected roots. 2063 2064 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2065 @*/ 2066 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2067 { 2068 const PetscSFNode *remotePointsA, *remotePointsB; 2069 PetscSFNode *remotePointsBA; 2070 const PetscInt *localPointsA, *localPointsB; 2071 PetscSFNode *reorderedRemotePointsA = NULL; 2072 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2073 MPI_Op op; 2074 #if defined(PETSC_USE_64BIT_INDICES) 2075 PetscBool iswin; 2076 #endif 2077 2078 PetscFunctionBegin; 2079 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2080 PetscSFCheckGraphSet(sfA, 1); 2081 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2082 PetscSFCheckGraphSet(sfB, 2); 2083 PetscCheckSameComm(sfA, 1, sfB, 2); 2084 PetscValidPointer(sfBA, 3); 2085 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2086 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2087 2088 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2089 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2090 2091 /* TODO: Check roots of sfB have degree of 1 */ 2092 /* Once we implement it, we can replace the MPI_MAXLOC 2093 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2094 We use MPI_MAXLOC only to have a deterministic output from this routine if 2095 the root condition is not meet. 2096 */ 2097 op = MPI_MAXLOC; 2098 #if defined(PETSC_USE_64BIT_INDICES) 2099 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2100 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2101 if (iswin) op = MPI_REPLACE; 2102 #endif 2103 2104 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2105 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2106 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2107 reorderedRemotePointsA[i].rank = -1; 2108 reorderedRemotePointsA[i].index = -1; 2109 } 2110 if (localPointsA) { 2111 for (i = 0; i < numLeavesA; i++) { 2112 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2113 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2114 } 2115 } else { 2116 for (i = 0; i < numLeavesA; i++) { 2117 if (i > maxleaf || i < minleaf) continue; 2118 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2119 } 2120 } 2121 2122 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2123 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2124 for (i = 0; i < numRootsB; i++) { 2125 remotePointsBA[i].rank = -1; 2126 remotePointsBA[i].index = -1; 2127 } 2128 2129 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2130 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2131 PetscCall(PetscFree(reorderedRemotePointsA)); 2132 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2133 if (remotePointsBA[i].rank == -1) continue; 2134 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2135 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2136 localPointsBA[numLeavesBA] = i; 2137 numLeavesBA++; 2138 } 2139 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2140 PetscCall(PetscSFSetFromOptions(*sfBA)); 2141 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 /* 2146 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2147 2148 Input Parameters: 2149 . sf - The global `PetscSF` 2150 2151 Output Parameters: 2152 . out - The local `PetscSF` 2153 2154 .seealso: `PetscSF`, `PetscSFCreate()` 2155 */ 2156 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2157 { 2158 MPI_Comm comm; 2159 PetscMPIInt myrank; 2160 const PetscInt *ilocal; 2161 const PetscSFNode *iremote; 2162 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2163 PetscSFNode *liremote; 2164 PetscSF lsf; 2165 2166 PetscFunctionBegin; 2167 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2168 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2169 else { 2170 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2171 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2172 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2173 2174 /* Find out local edges and build a local SF */ 2175 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2176 for (i = lnleaves = 0; i < nleaves; i++) { 2177 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2178 } 2179 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2180 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2181 2182 for (i = j = 0; i < nleaves; i++) { 2183 if (iremote[i].rank == (PetscInt)myrank) { 2184 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2185 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2186 liremote[j].index = iremote[i].index; 2187 j++; 2188 } 2189 } 2190 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2191 PetscCall(PetscSFSetFromOptions(lsf)); 2192 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2193 PetscCall(PetscSFSetUp(lsf)); 2194 *out = lsf; 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2200 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2201 { 2202 PetscMemType rootmtype, leafmtype; 2203 2204 PetscFunctionBegin; 2205 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2206 PetscCall(PetscSFSetUp(sf)); 2207 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2208 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2209 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2210 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2211 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 /*@ 2216 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2217 2218 Input Parameters: 2219 + comm - the communicator 2220 . nsfs - the number of input `PetscSF` 2221 . sfs - the array of input `PetscSF` 2222 . shareRoots - the flag whether roots of input `PetscSF` are taken as shared (`PETSC_TRUE`), or separate and concatenated (`PETSC_FALSE`) 2223 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or NULL for contiguous storage 2224 2225 Output Parameters: 2226 . newsf - The resulting `PetscSF` 2227 2228 Level: developer 2229 2230 Notes: 2231 The communicator of all SFs in sfs must be comm. 2232 2233 The offsets in leafOffsets are added to the original leaf indices. 2234 2235 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2236 In this case, NULL is also passed as ilocal to the resulting SF. 2237 2238 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2239 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2240 2241 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2242 @*/ 2243 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) 2244 { 2245 PetscInt i, s, nLeaves, nRoots; 2246 PetscInt *leafArrayOffsets; 2247 PetscInt *ilocal_new; 2248 PetscSFNode *iremote_new; 2249 PetscInt *rootOffsets; 2250 PetscBool all_ilocal_null = PETSC_FALSE; 2251 PetscMPIInt rank; 2252 2253 PetscFunctionBegin; 2254 { 2255 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2256 2257 PetscCall(PetscSFCreate(comm, &dummy)); 2258 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2259 PetscValidPointer(sfs, 3); 2260 for (i = 0; i < nsfs; i++) { 2261 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2262 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2263 } 2264 PetscValidLogicalCollectiveBool(dummy, shareRoots, 4); 2265 if (leafOffsets) PetscValidIntPointer(leafOffsets, 5); 2266 PetscValidPointer(newsf, 6); 2267 PetscCall(PetscSFDestroy(&dummy)); 2268 } 2269 if (!nsfs) { 2270 PetscCall(PetscSFCreate(comm, newsf)); 2271 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2272 PetscFunctionReturn(0); 2273 } 2274 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2275 2276 PetscCall(PetscCalloc1(nsfs + 1, &rootOffsets)); 2277 if (shareRoots) { 2278 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2279 if (PetscDefined(USE_DEBUG)) { 2280 for (s = 1; s < nsfs; s++) { 2281 PetscInt nr; 2282 2283 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2284 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2285 } 2286 } 2287 } else { 2288 for (s = 0; s < nsfs; s++) { 2289 PetscInt nr; 2290 2291 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2292 rootOffsets[s + 1] = rootOffsets[s] + nr; 2293 } 2294 nRoots = rootOffsets[nsfs]; 2295 } 2296 2297 /* Calculate leaf array offsets and automatic root offsets */ 2298 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2299 leafArrayOffsets[0] = 0; 2300 for (s = 0; s < nsfs; s++) { 2301 PetscInt nl; 2302 2303 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2304 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2305 } 2306 nLeaves = leafArrayOffsets[nsfs]; 2307 2308 if (!leafOffsets) { 2309 all_ilocal_null = PETSC_TRUE; 2310 for (s = 0; s < nsfs; s++) { 2311 const PetscInt *ilocal; 2312 2313 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2314 if (ilocal) { 2315 all_ilocal_null = PETSC_FALSE; 2316 break; 2317 } 2318 } 2319 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2320 } 2321 2322 /* Renumber and concatenate local leaves */ 2323 ilocal_new = NULL; 2324 if (!all_ilocal_null) { 2325 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2326 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2327 for (s = 0; s < nsfs; s++) { 2328 const PetscInt *ilocal; 2329 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2330 PetscInt i, nleaves_l; 2331 2332 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2333 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2334 } 2335 } 2336 2337 /* Renumber and concatenate remote roots */ 2338 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2339 for (i = 0; i < nLeaves; i++) { 2340 iremote_new[i].rank = -1; 2341 iremote_new[i].index = -1; 2342 } 2343 for (s = 0; s < nsfs; s++) { 2344 PetscInt i, nl, nr; 2345 PetscSF tmp_sf; 2346 const PetscSFNode *iremote; 2347 PetscSFNode *tmp_rootdata; 2348 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2349 2350 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2351 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2352 /* create helper SF with contiguous leaves */ 2353 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2354 PetscCall(PetscSFSetUp(tmp_sf)); 2355 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2356 for (i = 0; i < nr; i++) { 2357 tmp_rootdata[i].index = i + rootOffsets[s]; 2358 tmp_rootdata[i].rank = (PetscInt)rank; 2359 } 2360 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2361 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2362 PetscCall(PetscSFDestroy(&tmp_sf)); 2363 PetscCall(PetscFree(tmp_rootdata)); 2364 } 2365 2366 /* Build the new SF */ 2367 PetscCall(PetscSFCreate(comm, newsf)); 2368 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2369 PetscCall(PetscSFSetUp(*newsf)); 2370 PetscCall(PetscFree(rootOffsets)); 2371 PetscCall(PetscFree(leafArrayOffsets)); 2372 PetscFunctionReturn(0); 2373 } 2374