1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME); 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 28 /*@ 29 PetscSFCreate - create a star forest communication context 30 31 Collective 32 33 Input Parameter: 34 . comm - communicator on which the star forest will operate 35 36 Output Parameter: 37 . sf - new star forest context 38 39 Options Database Keys: 40 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 41 . -sf_type window -Use MPI-3 one-sided window for communication 42 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 43 44 Level: intermediate 45 46 Notes: 47 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 48 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 49 SFs are optimized and they have better performance than general SFs. 50 51 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 52 @*/ 53 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 54 { 55 PetscSF b; 56 57 PetscFunctionBegin; 58 PetscValidPointer(sf, 2); 59 PetscCall(PetscSFInitializePackage()); 60 61 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 62 63 b->nroots = -1; 64 b->nleaves = -1; 65 b->minleaf = PETSC_MAX_INT; 66 b->maxleaf = PETSC_MIN_INT; 67 b->nranks = -1; 68 b->rankorder = PETSC_TRUE; 69 b->ingroup = MPI_GROUP_NULL; 70 b->outgroup = MPI_GROUP_NULL; 71 b->graphset = PETSC_FALSE; 72 #if defined(PETSC_HAVE_DEVICE) 73 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 74 b->use_stream_aware_mpi = PETSC_FALSE; 75 b->unknown_input_stream = PETSC_FALSE; 76 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 77 b->backend = PETSCSF_BACKEND_KOKKOS; 78 #elif defined(PETSC_HAVE_CUDA) 79 b->backend = PETSCSF_BACKEND_CUDA; 80 #elif defined(PETSC_HAVE_HIP) 81 b->backend = PETSCSF_BACKEND_HIP; 82 #endif 83 84 #if defined(PETSC_HAVE_NVSHMEM) 85 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 86 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 87 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 88 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 89 #endif 90 #endif 91 b->vscat.from_n = -1; 92 b->vscat.to_n = -1; 93 b->vscat.unit = MPIU_SCALAR; 94 *sf = b; 95 PetscFunctionReturn(0); 96 } 97 98 /*@ 99 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 100 101 Collective 102 103 Input Parameter: 104 . sf - star forest 105 106 Level: advanced 107 108 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 109 @*/ 110 PetscErrorCode PetscSFReset(PetscSF sf) 111 { 112 PetscFunctionBegin; 113 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 114 PetscTryTypeMethod(sf, Reset); 115 sf->nroots = -1; 116 sf->nleaves = -1; 117 sf->minleaf = PETSC_MAX_INT; 118 sf->maxleaf = PETSC_MIN_INT; 119 sf->mine = NULL; 120 sf->remote = NULL; 121 sf->graphset = PETSC_FALSE; 122 PetscCall(PetscFree(sf->mine_alloc)); 123 PetscCall(PetscFree(sf->remote_alloc)); 124 sf->nranks = -1; 125 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 126 sf->degreeknown = PETSC_FALSE; 127 PetscCall(PetscFree(sf->degree)); 128 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 129 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 130 if (sf->multi) sf->multi->multi = NULL; 131 PetscCall(PetscSFDestroy(&sf->multi)); 132 PetscCall(PetscLayoutDestroy(&sf->map)); 133 134 #if defined(PETSC_HAVE_DEVICE) 135 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 136 #endif 137 138 sf->setupcalled = PETSC_FALSE; 139 PetscFunctionReturn(0); 140 } 141 142 /*@C 143 PetscSFSetType - Set the PetscSF communication implementation 144 145 Collective on PetscSF 146 147 Input Parameters: 148 + sf - the PetscSF context 149 - type - a known method 150 151 Options Database Key: 152 . -sf_type <type> - Sets the method; use -help for a list 153 of available methods (for instance, window, basic, neighbor) 154 155 Notes: 156 See "include/petscsf.h" for available methods (for instance) 157 + PETSCSFWINDOW - MPI-2/3 one-sided 158 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 159 160 Level: intermediate 161 162 .seealso: `PetscSFType`, `PetscSFCreate()` 163 @*/ 164 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 165 { 166 PetscBool match; 167 PetscErrorCode (*r)(PetscSF); 168 169 PetscFunctionBegin; 170 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 171 PetscValidCharPointer(type, 2); 172 173 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 174 if (match) PetscFunctionReturn(0); 175 176 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 177 PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 178 /* Destroy the previous PetscSF implementation context */ 179 PetscTryTypeMethod(sf, Destroy); 180 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 181 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 182 PetscCall((*r)(sf)); 183 PetscFunctionReturn(0); 184 } 185 186 /*@C 187 PetscSFGetType - Get the PetscSF communication implementation 188 189 Not Collective 190 191 Input Parameter: 192 . sf - the PetscSF context 193 194 Output Parameter: 195 . type - the PetscSF type name 196 197 Level: intermediate 198 199 .seealso: `PetscSFSetType()`, `PetscSFCreate()` 200 @*/ 201 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 202 { 203 PetscFunctionBegin; 204 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 205 PetscValidPointer(type, 2); 206 *type = ((PetscObject)sf)->type_name; 207 PetscFunctionReturn(0); 208 } 209 210 /*@C 211 PetscSFDestroy - destroy star forest 212 213 Collective 214 215 Input Parameter: 216 . sf - address of star forest 217 218 Level: intermediate 219 220 .seealso: `PetscSFCreate()`, `PetscSFReset()` 221 @*/ 222 PetscErrorCode PetscSFDestroy(PetscSF *sf) 223 { 224 PetscFunctionBegin; 225 if (!*sf) PetscFunctionReturn(0); 226 PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1); 227 if (--((PetscObject)(*sf))->refct > 0) { 228 *sf = NULL; 229 PetscFunctionReturn(0); 230 } 231 PetscCall(PetscSFReset(*sf)); 232 PetscTryTypeMethod((*sf), Destroy); 233 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 234 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 235 PetscCall(PetscHeaderDestroy(sf)); 236 PetscFunctionReturn(0); 237 } 238 239 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 240 { 241 PetscInt i, nleaves; 242 PetscMPIInt size; 243 const PetscInt *ilocal; 244 const PetscSFNode *iremote; 245 246 PetscFunctionBegin; 247 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 248 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 249 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 250 for (i = 0; i < nleaves; i++) { 251 const PetscInt rank = iremote[i].rank; 252 const PetscInt remote = iremote[i].index; 253 const PetscInt leaf = ilocal ? ilocal[i] : i; 254 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 255 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 256 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 257 } 258 PetscFunctionReturn(0); 259 } 260 261 /*@ 262 PetscSFSetUp - set up communication structures 263 264 Collective 265 266 Input Parameter: 267 . sf - star forest communication object 268 269 Level: beginner 270 271 .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()` 272 @*/ 273 PetscErrorCode PetscSFSetUp(PetscSF sf) 274 { 275 PetscFunctionBegin; 276 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 277 PetscSFCheckGraphSet(sf, 1); 278 if (sf->setupcalled) PetscFunctionReturn(0); 279 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 280 PetscCall(PetscSFCheckGraphValid_Private(sf)); 281 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 282 PetscTryTypeMethod(sf, SetUp); 283 #if defined(PETSC_HAVE_CUDA) 284 if (sf->backend == PETSCSF_BACKEND_CUDA) { 285 sf->ops->Malloc = PetscSFMalloc_CUDA; 286 sf->ops->Free = PetscSFFree_CUDA; 287 } 288 #endif 289 #if defined(PETSC_HAVE_HIP) 290 if (sf->backend == PETSCSF_BACKEND_HIP) { 291 sf->ops->Malloc = PetscSFMalloc_HIP; 292 sf->ops->Free = PetscSFFree_HIP; 293 } 294 #endif 295 296 # 297 #if defined(PETSC_HAVE_KOKKOS) 298 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 299 sf->ops->Malloc = PetscSFMalloc_Kokkos; 300 sf->ops->Free = PetscSFFree_Kokkos; 301 } 302 #endif 303 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 304 sf->setupcalled = PETSC_TRUE; 305 PetscFunctionReturn(0); 306 } 307 308 /*@ 309 PetscSFSetFromOptions - set PetscSF options using the options database 310 311 Logically Collective 312 313 Input Parameter: 314 . sf - star forest 315 316 Options Database Keys: 317 + -sf_type - implementation type, see PetscSFSetType() 318 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 319 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 320 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 321 If true, this option only works with -use_gpu_aware_mpi 1. 322 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 323 If true, this option only works with -use_gpu_aware_mpi 1. 324 325 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 326 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 327 the only available is kokkos. 328 329 Level: intermediate 330 @*/ 331 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 332 { 333 PetscSFType deft; 334 char type[256]; 335 PetscBool flg; 336 337 PetscFunctionBegin; 338 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 339 PetscObjectOptionsBegin((PetscObject)sf); 340 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 341 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 342 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 343 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 344 #if defined(PETSC_HAVE_DEVICE) 345 { 346 char backendstr[32] = {0}; 347 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 348 /* Change the defaults set in PetscSFCreate() with command line options */ 349 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 350 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 351 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 352 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 353 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 354 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 355 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 356 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 357 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 358 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 359 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 360 #elif defined(PETSC_HAVE_KOKKOS) 361 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 362 #endif 363 } 364 #endif 365 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 366 PetscOptionsEnd(); 367 PetscFunctionReturn(0); 368 } 369 370 /*@ 371 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 372 373 Logically Collective 374 375 Input Parameters: 376 + sf - star forest 377 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 378 379 Level: advanced 380 381 .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 382 @*/ 383 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 384 { 385 PetscFunctionBegin; 386 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 387 PetscValidLogicalCollectiveBool(sf, flg, 2); 388 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 389 sf->rankorder = flg; 390 PetscFunctionReturn(0); 391 } 392 393 /*@C 394 PetscSFSetGraph - Set a parallel star forest 395 396 Collective 397 398 Input Parameters: 399 + sf - star forest 400 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 401 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 402 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 403 during setup in debug mode) 404 . localmode - copy mode for ilocal 405 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 406 during setup in debug mode) 407 - remotemode - copy mode for iremote 408 409 Level: intermediate 410 411 Notes: 412 Leaf indices in ilocal must be unique, otherwise an error occurs. 413 414 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 415 In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 416 PETSc might modify the respective array; 417 if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 418 Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 419 420 Fortran Notes: 421 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 422 423 Developer Notes: 424 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 425 This also allows to compare leaf sets of two SFs easily. 426 427 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 428 @*/ 429 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 430 { 431 PetscBool unique, contiguous; 432 433 PetscFunctionBegin; 434 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 435 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4); 436 if (nleaves > 0) PetscValidPointer(iremote, 6); 437 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 438 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 439 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 440 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 441 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 442 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 443 444 if (sf->nroots >= 0) { /* Reset only if graph already set */ 445 PetscCall(PetscSFReset(sf)); 446 } 447 448 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 449 450 sf->nroots = nroots; 451 sf->nleaves = nleaves; 452 453 if (localmode == PETSC_COPY_VALUES && ilocal) { 454 PetscInt *tlocal = NULL; 455 456 PetscCall(PetscMalloc1(nleaves, &tlocal)); 457 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 458 ilocal = tlocal; 459 } 460 if (remotemode == PETSC_COPY_VALUES) { 461 PetscSFNode *tremote = NULL; 462 463 PetscCall(PetscMalloc1(nleaves, &tremote)); 464 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 465 iremote = tremote; 466 } 467 468 if (nleaves && ilocal) { 469 PetscSFNode work; 470 471 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 472 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 473 unique = PetscNot(unique); 474 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 475 sf->minleaf = ilocal[0]; 476 sf->maxleaf = ilocal[nleaves - 1]; 477 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 478 } else { 479 sf->minleaf = 0; 480 sf->maxleaf = nleaves - 1; 481 unique = PETSC_TRUE; 482 contiguous = PETSC_TRUE; 483 } 484 485 if (contiguous) { 486 if (localmode == PETSC_USE_POINTER) { 487 ilocal = NULL; 488 } else { 489 PetscCall(PetscFree(ilocal)); 490 } 491 } 492 sf->mine = ilocal; 493 if (localmode == PETSC_USE_POINTER) { 494 sf->mine_alloc = NULL; 495 } else { 496 sf->mine_alloc = ilocal; 497 } 498 sf->remote = iremote; 499 if (remotemode == PETSC_USE_POINTER) { 500 sf->remote_alloc = NULL; 501 } else { 502 sf->remote_alloc = iremote; 503 } 504 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 505 sf->graphset = PETSC_TRUE; 506 PetscFunctionReturn(0); 507 } 508 509 /*@ 510 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 511 512 Collective 513 514 Input Parameters: 515 + sf - The PetscSF 516 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 517 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 518 519 Notes: 520 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 521 n and N are local and global sizes of x respectively. 522 523 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 524 sequential vectors y on all ranks. 525 526 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 527 sequential vector y on rank 0. 528 529 In above cases, entries of x are roots and entries of y are leaves. 530 531 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 532 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 533 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 534 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 535 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 536 537 In this case, roots and leaves are symmetric. 538 539 Level: intermediate 540 @*/ 541 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 542 { 543 MPI_Comm comm; 544 PetscInt n, N, res[2]; 545 PetscMPIInt rank, size; 546 PetscSFType type; 547 548 PetscFunctionBegin; 549 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 550 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2); 551 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 552 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 553 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 554 PetscCallMPI(MPI_Comm_size(comm, &size)); 555 556 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 557 type = PETSCSFALLTOALL; 558 PetscCall(PetscLayoutCreate(comm, &sf->map)); 559 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 560 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 561 PetscCall(PetscLayoutSetUp(sf->map)); 562 } else { 563 PetscCall(PetscLayoutGetLocalSize(map, &n)); 564 PetscCall(PetscLayoutGetSize(map, &N)); 565 res[0] = n; 566 res[1] = -n; 567 /* Check if n are same over all ranks so that we can optimize it */ 568 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 569 if (res[0] == -res[1]) { /* same n */ 570 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 571 } else { 572 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 573 } 574 PetscCall(PetscLayoutReference(map, &sf->map)); 575 } 576 PetscCall(PetscSFSetType(sf, type)); 577 578 sf->pattern = pattern; 579 sf->mine = NULL; /* Contiguous */ 580 581 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 582 Also set other easy stuff. 583 */ 584 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 585 sf->nleaves = N; 586 sf->nroots = n; 587 sf->nranks = size; 588 sf->minleaf = 0; 589 sf->maxleaf = N - 1; 590 } else if (pattern == PETSCSF_PATTERN_GATHER) { 591 sf->nleaves = rank ? 0 : N; 592 sf->nroots = n; 593 sf->nranks = rank ? 0 : size; 594 sf->minleaf = 0; 595 sf->maxleaf = rank ? -1 : N - 1; 596 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 597 sf->nleaves = size; 598 sf->nroots = size; 599 sf->nranks = size; 600 sf->minleaf = 0; 601 sf->maxleaf = size - 1; 602 } 603 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 604 sf->graphset = PETSC_TRUE; 605 PetscFunctionReturn(0); 606 } 607 608 /*@ 609 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 610 611 Collective 612 613 Input Parameter: 614 . sf - star forest to invert 615 616 Output Parameter: 617 . isf - inverse of sf 618 619 Level: advanced 620 621 Notes: 622 All roots must have degree 1. 623 624 The local space may be a permutation, but cannot be sparse. 625 626 .seealso: `PetscSFSetGraph()` 627 @*/ 628 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 629 { 630 PetscMPIInt rank; 631 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 632 const PetscInt *ilocal; 633 PetscSFNode *roots, *leaves; 634 635 PetscFunctionBegin; 636 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 637 PetscSFCheckGraphSet(sf, 1); 638 PetscValidPointer(isf, 2); 639 640 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 641 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 642 643 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 644 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 645 for (i = 0; i < maxlocal; i++) { 646 leaves[i].rank = rank; 647 leaves[i].index = i; 648 } 649 for (i = 0; i < nroots; i++) { 650 roots[i].rank = -1; 651 roots[i].index = -1; 652 } 653 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 654 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 655 656 /* Check whether our leaves are sparse */ 657 for (i = 0, count = 0; i < nroots; i++) 658 if (roots[i].rank >= 0) count++; 659 if (count == nroots) newilocal = NULL; 660 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 661 for (i = 0, count = 0; i < nroots; i++) { 662 if (roots[i].rank >= 0) { 663 newilocal[count] = i; 664 roots[count].rank = roots[i].rank; 665 roots[count].index = roots[i].index; 666 count++; 667 } 668 } 669 } 670 671 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 672 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 673 PetscCall(PetscFree2(roots, leaves)); 674 PetscFunctionReturn(0); 675 } 676 677 /*@ 678 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 679 680 Collective 681 682 Input Parameters: 683 + sf - communication object to duplicate 684 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 685 686 Output Parameter: 687 . newsf - new communication object 688 689 Level: beginner 690 691 .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 692 @*/ 693 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 694 { 695 PetscSFType type; 696 MPI_Datatype dtype = MPIU_SCALAR; 697 698 PetscFunctionBegin; 699 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 700 PetscValidLogicalCollectiveEnum(sf, opt, 2); 701 PetscValidPointer(newsf, 3); 702 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 703 PetscCall(PetscSFGetType(sf, &type)); 704 if (type) PetscCall(PetscSFSetType(*newsf, type)); 705 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */ 706 if (opt == PETSCSF_DUPLICATE_GRAPH) { 707 PetscSFCheckGraphSet(sf, 1); 708 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 709 PetscInt nroots, nleaves; 710 const PetscInt *ilocal; 711 const PetscSFNode *iremote; 712 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 713 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 714 } else { 715 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 716 } 717 } 718 /* Since oldtype is committed, so is newtype, according to MPI */ 719 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 720 (*newsf)->vscat.bs = sf->vscat.bs; 721 (*newsf)->vscat.unit = dtype; 722 (*newsf)->vscat.to_n = sf->vscat.to_n; 723 (*newsf)->vscat.from_n = sf->vscat.from_n; 724 /* Do not copy lsf. Build it on demand since it is rarely used */ 725 726 #if defined(PETSC_HAVE_DEVICE) 727 (*newsf)->backend = sf->backend; 728 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 729 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 730 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 731 #endif 732 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 733 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 734 PetscFunctionReturn(0); 735 } 736 737 /*@C 738 PetscSFGetGraph - Get the graph specifying a parallel star forest 739 740 Not Collective 741 742 Input Parameter: 743 . sf - star forest 744 745 Output Parameters: 746 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 747 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 748 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 749 - iremote - remote locations of root vertices for each leaf on the current process 750 751 Notes: 752 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 753 754 The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 755 see its manpage for details. 756 757 Fortran Notes: 758 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 759 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 760 761 To check for a NULL ilocal use 762 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 763 764 Level: intermediate 765 766 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 767 @*/ 768 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 769 { 770 PetscFunctionBegin; 771 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 772 if (sf->ops->GetGraph) { 773 PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote)); 774 } else { 775 if (nroots) *nroots = sf->nroots; 776 if (nleaves) *nleaves = sf->nleaves; 777 if (ilocal) *ilocal = sf->mine; 778 if (iremote) *iremote = sf->remote; 779 } 780 PetscFunctionReturn(0); 781 } 782 783 /*@ 784 PetscSFGetLeafRange - Get the active leaf ranges 785 786 Not Collective 787 788 Input Parameter: 789 . sf - star forest 790 791 Output Parameters: 792 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 793 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 794 795 Level: developer 796 797 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 798 @*/ 799 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 800 { 801 PetscFunctionBegin; 802 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 803 PetscSFCheckGraphSet(sf, 1); 804 if (minleaf) *minleaf = sf->minleaf; 805 if (maxleaf) *maxleaf = sf->maxleaf; 806 PetscFunctionReturn(0); 807 } 808 809 /*@C 810 PetscSFViewFromOptions - View from Options 811 812 Collective on PetscSF 813 814 Input Parameters: 815 + A - the star forest 816 . obj - Optional object 817 - name - command line option 818 819 Level: intermediate 820 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 821 @*/ 822 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 823 { 824 PetscFunctionBegin; 825 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 826 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 827 PetscFunctionReturn(0); 828 } 829 830 /*@C 831 PetscSFView - view a star forest 832 833 Collective 834 835 Input Parameters: 836 + sf - star forest 837 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 838 839 Level: beginner 840 841 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()` 842 @*/ 843 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 844 { 845 PetscBool iascii; 846 PetscViewerFormat format; 847 848 PetscFunctionBegin; 849 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 850 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 851 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 852 PetscCheckSameComm(sf, 1, viewer, 2); 853 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 854 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 855 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 856 PetscMPIInt rank; 857 PetscInt ii, i, j; 858 859 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 860 PetscCall(PetscViewerASCIIPushTab(viewer)); 861 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 862 if (!sf->graphset) { 863 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 864 PetscCall(PetscViewerASCIIPopTab(viewer)); 865 PetscFunctionReturn(0); 866 } 867 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 868 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 869 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 870 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 871 PetscCall(PetscViewerFlush(viewer)); 872 PetscCall(PetscViewerGetFormat(viewer, &format)); 873 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 874 PetscMPIInt *tmpranks, *perm; 875 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 876 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 877 for (i = 0; i < sf->nranks; i++) perm[i] = i; 878 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 879 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 880 for (ii = 0; ii < sf->nranks; ii++) { 881 i = perm[ii]; 882 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 883 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 884 } 885 PetscCall(PetscFree2(tmpranks, perm)); 886 } 887 PetscCall(PetscViewerFlush(viewer)); 888 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 889 } 890 PetscCall(PetscViewerASCIIPopTab(viewer)); 891 } 892 PetscTryTypeMethod(sf, View, viewer); 893 PetscFunctionReturn(0); 894 } 895 896 /*@C 897 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 898 899 Not Collective 900 901 Input Parameter: 902 . sf - star forest 903 904 Output Parameters: 905 + nranks - number of ranks referenced by local part 906 . ranks - array of ranks 907 . roffset - offset in rmine/rremote for each rank (length nranks+1) 908 . rmine - concatenated array holding local indices referencing each remote rank 909 - rremote - concatenated array holding remote indices referenced for each remote rank 910 911 Level: developer 912 913 .seealso: `PetscSFGetLeafRanks()` 914 @*/ 915 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 916 { 917 PetscFunctionBegin; 918 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 919 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 920 if (sf->ops->GetRootRanks) { 921 PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote)); 922 } else { 923 /* The generic implementation */ 924 if (nranks) *nranks = sf->nranks; 925 if (ranks) *ranks = sf->ranks; 926 if (roffset) *roffset = sf->roffset; 927 if (rmine) *rmine = sf->rmine; 928 if (rremote) *rremote = sf->rremote; 929 } 930 PetscFunctionReturn(0); 931 } 932 933 /*@C 934 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 935 936 Not Collective 937 938 Input Parameter: 939 . sf - star forest 940 941 Output Parameters: 942 + niranks - number of leaf ranks referencing roots on this process 943 . iranks - array of ranks 944 . ioffset - offset in irootloc for each rank (length niranks+1) 945 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 946 947 Level: developer 948 949 .seealso: `PetscSFGetRootRanks()` 950 @*/ 951 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 952 { 953 PetscFunctionBegin; 954 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 955 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 956 if (sf->ops->GetLeafRanks) { 957 PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc)); 958 } else { 959 PetscSFType type; 960 PetscCall(PetscSFGetType(sf, &type)); 961 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 967 { 968 PetscInt i; 969 for (i = 0; i < n; i++) { 970 if (needle == list[i]) return PETSC_TRUE; 971 } 972 return PETSC_FALSE; 973 } 974 975 /*@C 976 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 977 978 Collective 979 980 Input Parameters: 981 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 982 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 983 984 Level: developer 985 986 .seealso: `PetscSFGetRootRanks()` 987 @*/ 988 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 989 { 990 PetscTable table; 991 PetscTablePosition pos; 992 PetscMPIInt size, groupsize, *groupranks; 993 PetscInt *rcount, *ranks; 994 PetscInt i, irank = -1, orank = -1; 995 996 PetscFunctionBegin; 997 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 998 PetscSFCheckGraphSet(sf, 1); 999 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1000 PetscCall(PetscTableCreate(10, size, &table)); 1001 for (i = 0; i < sf->nleaves; i++) { 1002 /* Log 1-based rank */ 1003 PetscCall(PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1004 } 1005 PetscCall(PetscTableGetCount(table, &sf->nranks)); 1006 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1007 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1008 PetscCall(PetscTableGetHeadPosition(table, &pos)); 1009 for (i = 0; i < sf->nranks; i++) { 1010 PetscCall(PetscTableGetNext(table, &pos, &ranks[i], &rcount[i])); 1011 ranks[i]--; /* Convert back to 0-based */ 1012 } 1013 PetscCall(PetscTableDestroy(&table)); 1014 1015 /* We expect that dgroup is reliably "small" while nranks could be large */ 1016 { 1017 MPI_Group group = MPI_GROUP_NULL; 1018 PetscMPIInt *dgroupranks; 1019 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1020 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1021 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1022 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1023 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1024 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1025 PetscCallMPI(MPI_Group_free(&group)); 1026 PetscCall(PetscFree(dgroupranks)); 1027 } 1028 1029 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1030 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1031 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1032 if (InList(ranks[i], groupsize, groupranks)) break; 1033 } 1034 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1035 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1036 } 1037 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1038 PetscInt tmprank, tmpcount; 1039 1040 tmprank = ranks[i]; 1041 tmpcount = rcount[i]; 1042 ranks[i] = ranks[sf->ndranks]; 1043 rcount[i] = rcount[sf->ndranks]; 1044 ranks[sf->ndranks] = tmprank; 1045 rcount[sf->ndranks] = tmpcount; 1046 sf->ndranks++; 1047 } 1048 } 1049 PetscCall(PetscFree(groupranks)); 1050 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1051 PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1052 sf->roffset[0] = 0; 1053 for (i = 0; i < sf->nranks; i++) { 1054 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1055 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1056 rcount[i] = 0; 1057 } 1058 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1059 /* short circuit */ 1060 if (orank != sf->remote[i].rank) { 1061 /* Search for index of iremote[i].rank in sf->ranks */ 1062 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1063 if (irank < 0) { 1064 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1065 if (irank >= 0) irank += sf->ndranks; 1066 } 1067 orank = sf->remote[i].rank; 1068 } 1069 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1070 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1071 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1072 rcount[irank]++; 1073 } 1074 PetscCall(PetscFree2(rcount, ranks)); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 /*@C 1079 PetscSFGetGroups - gets incoming and outgoing process groups 1080 1081 Collective 1082 1083 Input Parameter: 1084 . sf - star forest 1085 1086 Output Parameters: 1087 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1088 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1089 1090 Level: developer 1091 1092 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1093 @*/ 1094 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1095 { 1096 MPI_Group group = MPI_GROUP_NULL; 1097 1098 PetscFunctionBegin; 1099 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1100 if (sf->ingroup == MPI_GROUP_NULL) { 1101 PetscInt i; 1102 const PetscInt *indegree; 1103 PetscMPIInt rank, *outranks, *inranks; 1104 PetscSFNode *remote; 1105 PetscSF bgcount; 1106 1107 /* Compute the number of incoming ranks */ 1108 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1109 for (i = 0; i < sf->nranks; i++) { 1110 remote[i].rank = sf->ranks[i]; 1111 remote[i].index = 0; 1112 } 1113 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1114 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1115 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1116 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1117 /* Enumerate the incoming ranks */ 1118 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1119 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1120 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1121 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1122 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1123 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1124 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1125 PetscCallMPI(MPI_Group_free(&group)); 1126 PetscCall(PetscFree2(inranks, outranks)); 1127 PetscCall(PetscSFDestroy(&bgcount)); 1128 } 1129 *incoming = sf->ingroup; 1130 1131 if (sf->outgroup == MPI_GROUP_NULL) { 1132 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1133 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1134 PetscCallMPI(MPI_Group_free(&group)); 1135 } 1136 *outgoing = sf->outgroup; 1137 PetscFunctionReturn(0); 1138 } 1139 1140 /*@ 1141 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1142 1143 Collective 1144 1145 Input Parameter: 1146 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1147 1148 Output Parameter: 1149 . multi - star forest with split roots, such that each root has degree exactly 1 1150 1151 Level: developer 1152 1153 Notes: 1154 1155 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1156 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1157 edge, it is a candidate for future optimization that might involve its removal. 1158 1159 .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1160 @*/ 1161 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1162 { 1163 PetscFunctionBegin; 1164 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1165 PetscValidPointer(multi, 2); 1166 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1167 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1168 *multi = sf->multi; 1169 sf->multi->multi = sf->multi; 1170 PetscFunctionReturn(0); 1171 } 1172 if (!sf->multi) { 1173 const PetscInt *indegree; 1174 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1175 PetscSFNode *remote; 1176 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1177 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1178 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1179 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1180 inoffset[0] = 0; 1181 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1182 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1183 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1184 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1185 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1186 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1187 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1188 } 1189 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1190 for (i = 0; i < sf->nleaves; i++) { 1191 remote[i].rank = sf->remote[i].rank; 1192 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1193 } 1194 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1195 sf->multi->multi = sf->multi; 1196 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1197 if (sf->rankorder) { /* Sort the ranks */ 1198 PetscMPIInt rank; 1199 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1200 PetscSFNode *newremote; 1201 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1202 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1203 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1204 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1205 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1206 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1207 /* Sort the incoming ranks at each vertex, build the inverse map */ 1208 for (i = 0; i < sf->nroots; i++) { 1209 PetscInt j; 1210 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1211 PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset)); 1212 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1213 } 1214 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1215 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1216 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1217 for (i = 0; i < sf->nleaves; i++) { 1218 newremote[i].rank = sf->remote[i].rank; 1219 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1220 } 1221 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1222 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1223 } 1224 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1225 } 1226 *multi = sf->multi; 1227 PetscFunctionReturn(0); 1228 } 1229 1230 /*@C 1231 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1232 1233 Collective 1234 1235 Input Parameters: 1236 + sf - original star forest 1237 . nselected - number of selected roots on this process 1238 - selected - indices of the selected roots on this process 1239 1240 Output Parameter: 1241 . esf - new star forest 1242 1243 Level: advanced 1244 1245 Note: 1246 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1247 be done by calling PetscSFGetGraph(). 1248 1249 .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()` 1250 @*/ 1251 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1252 { 1253 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1254 const PetscInt *ilocal; 1255 signed char *rootdata, *leafdata, *leafmem; 1256 const PetscSFNode *iremote; 1257 PetscSFNode *new_iremote; 1258 MPI_Comm comm; 1259 1260 PetscFunctionBegin; 1261 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1262 PetscSFCheckGraphSet(sf, 1); 1263 if (nselected) PetscValidIntPointer(selected, 3); 1264 PetscValidPointer(esf, 4); 1265 1266 PetscCall(PetscSFSetUp(sf)); 1267 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1268 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1269 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1270 1271 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1272 PetscBool dups; 1273 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1274 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1275 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1276 } 1277 1278 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1279 else { 1280 /* A generic version of creating embedded sf */ 1281 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1282 maxlocal = maxleaf - minleaf + 1; 1283 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1284 leafdata = leafmem - minleaf; 1285 /* Tag selected roots and bcast to leaves */ 1286 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1287 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1288 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1289 1290 /* Build esf with leaves that are still connected */ 1291 esf_nleaves = 0; 1292 for (i = 0; i < nleaves; i++) { 1293 j = ilocal ? ilocal[i] : i; 1294 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1295 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1296 */ 1297 esf_nleaves += (leafdata[j] ? 1 : 0); 1298 } 1299 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1300 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1301 for (i = n = 0; i < nleaves; i++) { 1302 j = ilocal ? ilocal[i] : i; 1303 if (leafdata[j]) { 1304 new_ilocal[n] = j; 1305 new_iremote[n].rank = iremote[i].rank; 1306 new_iremote[n].index = iremote[i].index; 1307 ++n; 1308 } 1309 } 1310 PetscCall(PetscSFCreate(comm, esf)); 1311 PetscCall(PetscSFSetFromOptions(*esf)); 1312 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1313 PetscCall(PetscFree2(rootdata, leafmem)); 1314 } 1315 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 /*@C 1320 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1321 1322 Collective 1323 1324 Input Parameters: 1325 + sf - original star forest 1326 . nselected - number of selected leaves on this process 1327 - selected - indices of the selected leaves on this process 1328 1329 Output Parameter: 1330 . newsf - new star forest 1331 1332 Level: advanced 1333 1334 .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1335 @*/ 1336 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1337 { 1338 const PetscSFNode *iremote; 1339 PetscSFNode *new_iremote; 1340 const PetscInt *ilocal; 1341 PetscInt i, nroots, *leaves, *new_ilocal; 1342 MPI_Comm comm; 1343 1344 PetscFunctionBegin; 1345 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1346 PetscSFCheckGraphSet(sf, 1); 1347 if (nselected) PetscValidIntPointer(selected, 3); 1348 PetscValidPointer(newsf, 4); 1349 1350 /* Uniq selected[] and put results in leaves[] */ 1351 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1352 PetscCall(PetscMalloc1(nselected, &leaves)); 1353 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1354 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1355 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1356 1357 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1358 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1359 else { 1360 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1361 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1362 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1363 for (i = 0; i < nselected; ++i) { 1364 const PetscInt l = leaves[i]; 1365 new_ilocal[i] = ilocal ? ilocal[l] : l; 1366 new_iremote[i].rank = iremote[l].rank; 1367 new_iremote[i].index = iremote[l].index; 1368 } 1369 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1370 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1371 } 1372 PetscCall(PetscFree(leaves)); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 /*@C 1377 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1378 1379 Collective on PetscSF 1380 1381 Input Parameters: 1382 + sf - star forest on which to communicate 1383 . unit - data type associated with each node 1384 . rootdata - buffer to broadcast 1385 - op - operation to use for reduction 1386 1387 Output Parameter: 1388 . leafdata - buffer to be reduced with values from each leaf's respective root 1389 1390 Level: intermediate 1391 1392 Notes: 1393 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1394 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1395 use PetscSFBcastWithMemTypeBegin() instead. 1396 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1397 @*/ 1398 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1399 { 1400 PetscMemType rootmtype, leafmtype; 1401 1402 PetscFunctionBegin; 1403 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1404 PetscCall(PetscSFSetUp(sf)); 1405 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1406 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1407 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1408 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1409 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 /*@C 1414 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1415 1416 Collective on PetscSF 1417 1418 Input Parameters: 1419 + sf - star forest on which to communicate 1420 . unit - data type associated with each node 1421 . rootmtype - memory type of rootdata 1422 . rootdata - buffer to broadcast 1423 . leafmtype - memory type of leafdata 1424 - op - operation to use for reduction 1425 1426 Output Parameter: 1427 . leafdata - buffer to be reduced with values from each leaf's respective root 1428 1429 Level: intermediate 1430 1431 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1432 @*/ 1433 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1434 { 1435 PetscFunctionBegin; 1436 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1437 PetscCall(PetscSFSetUp(sf)); 1438 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1439 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1440 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 /*@C 1445 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1446 1447 Collective 1448 1449 Input Parameters: 1450 + sf - star forest 1451 . unit - data type 1452 . rootdata - buffer to broadcast 1453 - op - operation to use for reduction 1454 1455 Output Parameter: 1456 . leafdata - buffer to be reduced with values from each leaf's respective root 1457 1458 Level: intermediate 1459 1460 .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1461 @*/ 1462 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1463 { 1464 PetscFunctionBegin; 1465 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1466 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1467 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1468 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 /*@C 1473 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1474 1475 Collective 1476 1477 Input Parameters: 1478 + sf - star forest 1479 . unit - data type 1480 . leafdata - values to reduce 1481 - op - reduction operation 1482 1483 Output Parameter: 1484 . rootdata - result of reduction of values from all leaves of each root 1485 1486 Level: intermediate 1487 1488 Notes: 1489 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1490 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1491 use PetscSFReduceWithMemTypeBegin() instead. 1492 1493 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()` 1494 @*/ 1495 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1496 { 1497 PetscMemType rootmtype, leafmtype; 1498 1499 PetscFunctionBegin; 1500 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1501 PetscCall(PetscSFSetUp(sf)); 1502 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1503 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1504 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1505 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1506 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1507 PetscFunctionReturn(0); 1508 } 1509 1510 /*@C 1511 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1512 1513 Collective 1514 1515 Input Parameters: 1516 + sf - star forest 1517 . unit - data type 1518 . leafmtype - memory type of leafdata 1519 . leafdata - values to reduce 1520 . rootmtype - memory type of rootdata 1521 - op - reduction operation 1522 1523 Output Parameter: 1524 . rootdata - result of reduction of values from all leaves of each root 1525 1526 Level: intermediate 1527 1528 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()` 1529 @*/ 1530 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1531 { 1532 PetscFunctionBegin; 1533 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1534 PetscCall(PetscSFSetUp(sf)); 1535 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1536 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1537 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 /*@C 1542 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1543 1544 Collective 1545 1546 Input Parameters: 1547 + sf - star forest 1548 . unit - data type 1549 . leafdata - values to reduce 1550 - op - reduction operation 1551 1552 Output Parameter: 1553 . rootdata - result of reduction of values from all leaves of each root 1554 1555 Level: intermediate 1556 1557 .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()` 1558 @*/ 1559 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1560 { 1561 PetscFunctionBegin; 1562 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1563 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1564 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1565 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1566 PetscFunctionReturn(0); 1567 } 1568 1569 /*@C 1570 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1571 1572 Collective 1573 1574 Input Parameters: 1575 + sf - star forest 1576 . unit - data type 1577 . leafdata - leaf values to use in reduction 1578 - op - operation to use for reduction 1579 1580 Output Parameters: 1581 + rootdata - root values to be updated, input state is seen by first process to perform an update 1582 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1583 1584 Level: advanced 1585 1586 Note: 1587 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1588 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1589 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1590 integers. 1591 1592 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1593 @*/ 1594 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1595 { 1596 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1597 1598 PetscFunctionBegin; 1599 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1600 PetscCall(PetscSFSetUp(sf)); 1601 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1602 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1603 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1604 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1605 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1606 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1607 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1608 PetscFunctionReturn(0); 1609 } 1610 1611 /*@C 1612 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1613 1614 Collective 1615 1616 Input Parameters: 1617 + sf - star forest 1618 . unit - data type 1619 . rootmtype - memory type of rootdata 1620 . leafmtype - memory type of leafdata 1621 . leafdata - leaf values to use in reduction 1622 . leafupdatemtype - memory type of leafupdate 1623 - op - operation to use for reduction 1624 1625 Output Parameters: 1626 + rootdata - root values to be updated, input state is seen by first process to perform an update 1627 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1628 1629 Level: advanced 1630 1631 Note: See PetscSFFetchAndOpBegin() for more details. 1632 1633 .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1634 @*/ 1635 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1636 { 1637 PetscFunctionBegin; 1638 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1639 PetscCall(PetscSFSetUp(sf)); 1640 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1641 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1642 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1643 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1644 PetscFunctionReturn(0); 1645 } 1646 1647 /*@C 1648 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1649 1650 Collective 1651 1652 Input Parameters: 1653 + sf - star forest 1654 . unit - data type 1655 . leafdata - leaf values to use in reduction 1656 - op - operation to use for reduction 1657 1658 Output Parameters: 1659 + rootdata - root values to be updated, input state is seen by first process to perform an update 1660 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1661 1662 Level: advanced 1663 1664 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()` 1665 @*/ 1666 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1667 { 1668 PetscFunctionBegin; 1669 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1670 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1671 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1672 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1673 PetscFunctionReturn(0); 1674 } 1675 1676 /*@C 1677 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1678 1679 Collective 1680 1681 Input Parameter: 1682 . sf - star forest 1683 1684 Output Parameter: 1685 . degree - degree of each root vertex 1686 1687 Level: advanced 1688 1689 Notes: 1690 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1691 1692 .seealso: `PetscSFGatherBegin()` 1693 @*/ 1694 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) 1695 { 1696 PetscFunctionBegin; 1697 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1698 PetscSFCheckGraphSet(sf, 1); 1699 PetscValidPointer(degree, 2); 1700 if (!sf->degreeknown) { 1701 PetscInt i, nroots = sf->nroots, maxlocal; 1702 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1703 maxlocal = sf->maxleaf - sf->minleaf + 1; 1704 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1705 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1706 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1707 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1708 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1709 } 1710 *degree = NULL; 1711 PetscFunctionReturn(0); 1712 } 1713 1714 /*@C 1715 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1716 1717 Collective 1718 1719 Input Parameter: 1720 . sf - star forest 1721 1722 Output Parameter: 1723 . degree - degree of each root vertex 1724 1725 Level: developer 1726 1727 Notes: 1728 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1729 1730 .seealso: 1731 @*/ 1732 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1733 { 1734 PetscFunctionBegin; 1735 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1736 PetscSFCheckGraphSet(sf, 1); 1737 PetscValidPointer(degree, 2); 1738 if (!sf->degreeknown) { 1739 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1740 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1741 PetscCall(PetscFree(sf->degreetmp)); 1742 sf->degreeknown = PETSC_TRUE; 1743 } 1744 *degree = sf->degree; 1745 PetscFunctionReturn(0); 1746 } 1747 1748 /*@C 1749 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1750 Each multi-root is assigned index of the corresponding original root. 1751 1752 Collective 1753 1754 Input Parameters: 1755 + sf - star forest 1756 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1757 1758 Output Parameters: 1759 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1760 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1761 1762 Level: developer 1763 1764 Notes: 1765 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1766 1767 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1768 @*/ 1769 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1770 { 1771 PetscSF msf; 1772 PetscInt i, j, k, nroots, nmroots; 1773 1774 PetscFunctionBegin; 1775 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1776 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1777 if (nroots) PetscValidIntPointer(degree, 2); 1778 if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3); 1779 PetscValidPointer(multiRootsOrigNumbering, 4); 1780 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1781 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1782 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1783 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1784 if (!degree[i]) continue; 1785 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1786 } 1787 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1788 if (nMultiRoots) *nMultiRoots = nmroots; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 /*@C 1793 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1794 1795 Collective 1796 1797 Input Parameters: 1798 + sf - star forest 1799 . unit - data type 1800 - leafdata - leaf data to gather to roots 1801 1802 Output Parameter: 1803 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1804 1805 Level: intermediate 1806 1807 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1808 @*/ 1809 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1810 { 1811 PetscSF multi = NULL; 1812 1813 PetscFunctionBegin; 1814 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1815 PetscCall(PetscSFSetUp(sf)); 1816 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1817 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 /*@C 1822 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1823 1824 Collective 1825 1826 Input Parameters: 1827 + sf - star forest 1828 . unit - data type 1829 - leafdata - leaf data to gather to roots 1830 1831 Output Parameter: 1832 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1833 1834 Level: intermediate 1835 1836 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1837 @*/ 1838 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1839 { 1840 PetscSF multi = NULL; 1841 1842 PetscFunctionBegin; 1843 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1844 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1845 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1846 PetscFunctionReturn(0); 1847 } 1848 1849 /*@C 1850 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1851 1852 Collective 1853 1854 Input Parameters: 1855 + sf - star forest 1856 . unit - data type 1857 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1858 1859 Output Parameter: 1860 . leafdata - leaf data to be update with personal data from each respective root 1861 1862 Level: intermediate 1863 1864 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1865 @*/ 1866 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1867 { 1868 PetscSF multi = NULL; 1869 1870 PetscFunctionBegin; 1871 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1872 PetscCall(PetscSFSetUp(sf)); 1873 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1874 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 /*@C 1879 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1880 1881 Collective 1882 1883 Input Parameters: 1884 + sf - star forest 1885 . unit - data type 1886 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1887 1888 Output Parameter: 1889 . leafdata - leaf data to be update with personal data from each respective root 1890 1891 Level: intermediate 1892 1893 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1894 @*/ 1895 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1896 { 1897 PetscSF multi = NULL; 1898 1899 PetscFunctionBegin; 1900 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1901 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1902 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1907 { 1908 PetscInt i, n, nleaves; 1909 const PetscInt *ilocal = NULL; 1910 PetscHSetI seen; 1911 1912 PetscFunctionBegin; 1913 if (PetscDefined(USE_DEBUG)) { 1914 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1915 PetscCall(PetscHSetICreate(&seen)); 1916 for (i = 0; i < nleaves; i++) { 1917 const PetscInt leaf = ilocal ? ilocal[i] : i; 1918 PetscCall(PetscHSetIAdd(seen, leaf)); 1919 } 1920 PetscCall(PetscHSetIGetSize(seen, &n)); 1921 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 1922 PetscCall(PetscHSetIDestroy(&seen)); 1923 } 1924 PetscFunctionReturn(0); 1925 } 1926 1927 /*@ 1928 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1929 1930 Input Parameters: 1931 + sfA - The first PetscSF 1932 - sfB - The second PetscSF 1933 1934 Output Parameters: 1935 . sfBA - The composite SF 1936 1937 Level: developer 1938 1939 Notes: 1940 Currently, the two SFs must be defined on congruent communicators and they must be true star 1941 forests, i.e. the same leaf is not connected with different roots. 1942 1943 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1944 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1945 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1946 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1947 1948 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1949 @*/ 1950 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 1951 { 1952 const PetscSFNode *remotePointsA, *remotePointsB; 1953 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 1954 const PetscInt *localPointsA, *localPointsB; 1955 PetscInt *localPointsBA; 1956 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 1957 PetscBool denseB; 1958 1959 PetscFunctionBegin; 1960 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 1961 PetscSFCheckGraphSet(sfA, 1); 1962 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 1963 PetscSFCheckGraphSet(sfB, 2); 1964 PetscCheckSameComm(sfA, 1, sfB, 2); 1965 PetscValidPointer(sfBA, 3); 1966 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1967 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1968 1969 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 1970 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 1971 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size */ 1972 /* numRootsB; otherwise, garbage will be broadcasted. */ 1973 /* Example (comm size = 1): */ 1974 /* sfA: 0 <- (0, 0) */ 1975 /* sfB: 100 <- (0, 0) */ 1976 /* 101 <- (0, 1) */ 1977 /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget */ 1978 /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */ 1979 /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on */ 1980 /* remotePointsA; if not recasted, point 101 would receive a garbage value. */ 1981 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 1982 for (i = 0; i < numRootsB; i++) { 1983 reorderedRemotePointsA[i].rank = -1; 1984 reorderedRemotePointsA[i].index = -1; 1985 } 1986 for (i = 0; i < numLeavesA; i++) { 1987 PetscInt localp = localPointsA ? localPointsA[i] : i; 1988 1989 if (localp >= numRootsB) continue; 1990 reorderedRemotePointsA[localp] = remotePointsA[i]; 1991 } 1992 remotePointsA = reorderedRemotePointsA; 1993 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 1994 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 1995 for (i = 0; i < maxleaf - minleaf + 1; i++) { 1996 leafdataB[i].rank = -1; 1997 leafdataB[i].index = -1; 1998 } 1999 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2000 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2001 PetscCall(PetscFree(reorderedRemotePointsA)); 2002 2003 denseB = (PetscBool)!localPointsB; 2004 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2005 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2006 else numLeavesBA++; 2007 } 2008 if (denseB) { 2009 localPointsBA = NULL; 2010 remotePointsBA = leafdataB; 2011 } else { 2012 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2013 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2014 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2015 const PetscInt l = localPointsB ? localPointsB[i] : i; 2016 2017 if (leafdataB[l - minleaf].rank == -1) continue; 2018 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2019 localPointsBA[numLeavesBA] = l; 2020 numLeavesBA++; 2021 } 2022 PetscCall(PetscFree(leafdataB)); 2023 } 2024 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2025 PetscCall(PetscSFSetFromOptions(*sfBA)); 2026 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2027 PetscFunctionReturn(0); 2028 } 2029 2030 /*@ 2031 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2032 2033 Input Parameters: 2034 + sfA - The first PetscSF 2035 - sfB - The second PetscSF 2036 2037 Output Parameters: 2038 . sfBA - The composite SF. 2039 2040 Level: developer 2041 2042 Notes: 2043 Currently, the two SFs must be defined on congruent communicators and they must be true star 2044 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2045 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2046 2047 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2048 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2049 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2050 a Reduce on sfB, on connected roots. 2051 2052 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2053 @*/ 2054 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2055 { 2056 const PetscSFNode *remotePointsA, *remotePointsB; 2057 PetscSFNode *remotePointsBA; 2058 const PetscInt *localPointsA, *localPointsB; 2059 PetscSFNode *reorderedRemotePointsA = NULL; 2060 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2061 MPI_Op op; 2062 #if defined(PETSC_USE_64BIT_INDICES) 2063 PetscBool iswin; 2064 #endif 2065 2066 PetscFunctionBegin; 2067 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2068 PetscSFCheckGraphSet(sfA, 1); 2069 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2070 PetscSFCheckGraphSet(sfB, 2); 2071 PetscCheckSameComm(sfA, 1, sfB, 2); 2072 PetscValidPointer(sfBA, 3); 2073 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2074 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2075 2076 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2077 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2078 2079 /* TODO: Check roots of sfB have degree of 1 */ 2080 /* Once we implement it, we can replace the MPI_MAXLOC 2081 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2082 We use MPI_MAXLOC only to have a deterministic output from this routine if 2083 the root condition is not meet. 2084 */ 2085 op = MPI_MAXLOC; 2086 #if defined(PETSC_USE_64BIT_INDICES) 2087 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2088 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2089 if (iswin) op = MPI_REPLACE; 2090 #endif 2091 2092 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2093 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2094 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2095 reorderedRemotePointsA[i].rank = -1; 2096 reorderedRemotePointsA[i].index = -1; 2097 } 2098 if (localPointsA) { 2099 for (i = 0; i < numLeavesA; i++) { 2100 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2101 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2102 } 2103 } else { 2104 for (i = 0; i < numLeavesA; i++) { 2105 if (i > maxleaf || i < minleaf) continue; 2106 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2107 } 2108 } 2109 2110 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2111 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2112 for (i = 0; i < numRootsB; i++) { 2113 remotePointsBA[i].rank = -1; 2114 remotePointsBA[i].index = -1; 2115 } 2116 2117 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2118 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2119 PetscCall(PetscFree(reorderedRemotePointsA)); 2120 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2121 if (remotePointsBA[i].rank == -1) continue; 2122 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2123 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2124 localPointsBA[numLeavesBA] = i; 2125 numLeavesBA++; 2126 } 2127 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2128 PetscCall(PetscSFSetFromOptions(*sfBA)); 2129 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2130 PetscFunctionReturn(0); 2131 } 2132 2133 /* 2134 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2135 2136 Input Parameters: 2137 . sf - The global PetscSF 2138 2139 Output Parameters: 2140 . out - The local PetscSF 2141 */ 2142 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2143 { 2144 MPI_Comm comm; 2145 PetscMPIInt myrank; 2146 const PetscInt *ilocal; 2147 const PetscSFNode *iremote; 2148 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2149 PetscSFNode *liremote; 2150 PetscSF lsf; 2151 2152 PetscFunctionBegin; 2153 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2154 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2155 else { 2156 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2157 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2158 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2159 2160 /* Find out local edges and build a local SF */ 2161 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2162 for (i = lnleaves = 0; i < nleaves; i++) { 2163 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2164 } 2165 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2166 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2167 2168 for (i = j = 0; i < nleaves; i++) { 2169 if (iremote[i].rank == (PetscInt)myrank) { 2170 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2171 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2172 liremote[j].index = iremote[i].index; 2173 j++; 2174 } 2175 } 2176 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2177 PetscCall(PetscSFSetFromOptions(lsf)); 2178 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2179 PetscCall(PetscSFSetUp(lsf)); 2180 *out = lsf; 2181 } 2182 PetscFunctionReturn(0); 2183 } 2184 2185 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2186 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2187 { 2188 PetscMemType rootmtype, leafmtype; 2189 2190 PetscFunctionBegin; 2191 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2192 PetscCall(PetscSFSetUp(sf)); 2193 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2194 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2195 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2196 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2197 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2198 PetscFunctionReturn(0); 2199 } 2200 2201 /*@ 2202 PetscSFConcatenate - concatenate multiple SFs into one 2203 2204 Input Parameters: 2205 + comm - the communicator 2206 . nsfs - the number of input PetscSF 2207 . sfs - the array of input PetscSF 2208 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE) 2209 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage 2210 2211 Output Parameters: 2212 . newsf - The resulting PetscSF 2213 2214 Level: developer 2215 2216 Notes: 2217 The communicator of all SFs in sfs must be comm. 2218 2219 The offsets in leafOffsets are added to the original leaf indices. 2220 2221 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2222 In this case, NULL is also passed as ilocal to the resulting SF. 2223 2224 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2225 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2226 2227 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2228 @*/ 2229 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) 2230 { 2231 PetscInt i, s, nLeaves, nRoots; 2232 PetscInt *leafArrayOffsets; 2233 PetscInt *ilocal_new; 2234 PetscSFNode *iremote_new; 2235 PetscInt *rootOffsets; 2236 PetscBool all_ilocal_null = PETSC_FALSE; 2237 PetscMPIInt rank; 2238 2239 PetscFunctionBegin; 2240 { 2241 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2242 2243 PetscCall(PetscSFCreate(comm, &dummy)); 2244 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2245 PetscValidPointer(sfs, 3); 2246 for (i = 0; i < nsfs; i++) { 2247 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2248 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2249 } 2250 PetscValidLogicalCollectiveBool(dummy, shareRoots, 4); 2251 if (leafOffsets) PetscValidIntPointer(leafOffsets, 5); 2252 PetscValidPointer(newsf, 6); 2253 PetscCall(PetscSFDestroy(&dummy)); 2254 } 2255 if (!nsfs) { 2256 PetscCall(PetscSFCreate(comm, newsf)); 2257 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2258 PetscFunctionReturn(0); 2259 } 2260 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2261 2262 PetscCall(PetscCalloc1(nsfs + 1, &rootOffsets)); 2263 if (shareRoots) { 2264 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2265 if (PetscDefined(USE_DEBUG)) { 2266 for (s = 1; s < nsfs; s++) { 2267 PetscInt nr; 2268 2269 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2270 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2271 } 2272 } 2273 } else { 2274 for (s = 0; s < nsfs; s++) { 2275 PetscInt nr; 2276 2277 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2278 rootOffsets[s + 1] = rootOffsets[s] + nr; 2279 } 2280 nRoots = rootOffsets[nsfs]; 2281 } 2282 2283 /* Calculate leaf array offsets and automatic root offsets */ 2284 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2285 leafArrayOffsets[0] = 0; 2286 for (s = 0; s < nsfs; s++) { 2287 PetscInt nl; 2288 2289 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2290 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2291 } 2292 nLeaves = leafArrayOffsets[nsfs]; 2293 2294 if (!leafOffsets) { 2295 all_ilocal_null = PETSC_TRUE; 2296 for (s = 0; s < nsfs; s++) { 2297 const PetscInt *ilocal; 2298 2299 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2300 if (ilocal) { 2301 all_ilocal_null = PETSC_FALSE; 2302 break; 2303 } 2304 } 2305 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2306 } 2307 2308 /* Renumber and concatenate local leaves */ 2309 ilocal_new = NULL; 2310 if (!all_ilocal_null) { 2311 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2312 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2313 for (s = 0; s < nsfs; s++) { 2314 const PetscInt *ilocal; 2315 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2316 PetscInt i, nleaves_l; 2317 2318 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2319 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2320 } 2321 } 2322 2323 /* Renumber and concatenate remote roots */ 2324 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2325 for (i = 0; i < nLeaves; i++) { 2326 iremote_new[i].rank = -1; 2327 iremote_new[i].index = -1; 2328 } 2329 for (s = 0; s < nsfs; s++) { 2330 PetscInt i, nl, nr; 2331 PetscSF tmp_sf; 2332 const PetscSFNode *iremote; 2333 PetscSFNode *tmp_rootdata; 2334 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2335 2336 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2337 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2338 /* create helper SF with contiguous leaves */ 2339 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2340 PetscCall(PetscSFSetUp(tmp_sf)); 2341 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2342 for (i = 0; i < nr; i++) { 2343 tmp_rootdata[i].index = i + rootOffsets[s]; 2344 tmp_rootdata[i].rank = (PetscInt)rank; 2345 } 2346 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2347 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2348 PetscCall(PetscSFDestroy(&tmp_sf)); 2349 PetscCall(PetscFree(tmp_rootdata)); 2350 } 2351 2352 /* Build the new SF */ 2353 PetscCall(PetscSFCreate(comm, newsf)); 2354 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2355 PetscCall(PetscSFSetUp(*newsf)); 2356 PetscCall(PetscFree(rootOffsets)); 2357 PetscCall(PetscFree(leafArrayOffsets)); 2358 PetscFunctionReturn(0); 2359 } 2360