1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME); 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 28 /*@ 29 PetscSFCreate - create a star forest communication context 30 31 Collective 32 33 Input Parameter: 34 . comm - communicator on which the star forest will operate 35 36 Output Parameter: 37 . sf - new star forest context 38 39 Options Database Keys: 40 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 41 . -sf_type window -Use MPI-3 one-sided window for communication 42 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 43 44 Level: intermediate 45 46 Notes: 47 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 48 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 49 SFs are optimized and they have better performance than general SFs. 50 51 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 52 @*/ 53 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) { 54 PetscSF b; 55 56 PetscFunctionBegin; 57 PetscValidPointer(sf, 2); 58 PetscCall(PetscSFInitializePackage()); 59 60 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 61 62 b->nroots = -1; 63 b->nleaves = -1; 64 b->minleaf = PETSC_MAX_INT; 65 b->maxleaf = PETSC_MIN_INT; 66 b->nranks = -1; 67 b->rankorder = PETSC_TRUE; 68 b->ingroup = MPI_GROUP_NULL; 69 b->outgroup = MPI_GROUP_NULL; 70 b->graphset = PETSC_FALSE; 71 #if defined(PETSC_HAVE_DEVICE) 72 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 73 b->use_stream_aware_mpi = PETSC_FALSE; 74 b->unknown_input_stream = PETSC_FALSE; 75 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 76 b->backend = PETSCSF_BACKEND_KOKKOS; 77 #elif defined(PETSC_HAVE_CUDA) 78 b->backend = PETSCSF_BACKEND_CUDA; 79 #elif defined(PETSC_HAVE_HIP) 80 b->backend = PETSCSF_BACKEND_HIP; 81 #endif 82 83 #if defined(PETSC_HAVE_NVSHMEM) 84 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 85 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 86 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 87 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 88 #endif 89 #endif 90 b->vscat.from_n = -1; 91 b->vscat.to_n = -1; 92 b->vscat.unit = MPIU_SCALAR; 93 *sf = b; 94 PetscFunctionReturn(0); 95 } 96 97 /*@ 98 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 99 100 Collective 101 102 Input Parameter: 103 . sf - star forest 104 105 Level: advanced 106 107 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 108 @*/ 109 PetscErrorCode PetscSFReset(PetscSF sf) { 110 PetscFunctionBegin; 111 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 112 PetscTryTypeMethod(sf, Reset); 113 sf->nroots = -1; 114 sf->nleaves = -1; 115 sf->minleaf = PETSC_MAX_INT; 116 sf->maxleaf = PETSC_MIN_INT; 117 sf->mine = NULL; 118 sf->remote = NULL; 119 sf->graphset = PETSC_FALSE; 120 PetscCall(PetscFree(sf->mine_alloc)); 121 PetscCall(PetscFree(sf->remote_alloc)); 122 sf->nranks = -1; 123 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 124 sf->degreeknown = PETSC_FALSE; 125 PetscCall(PetscFree(sf->degree)); 126 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 127 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 128 if (sf->multi) sf->multi->multi = NULL; 129 PetscCall(PetscSFDestroy(&sf->multi)); 130 PetscCall(PetscLayoutDestroy(&sf->map)); 131 132 #if defined(PETSC_HAVE_DEVICE) 133 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 134 #endif 135 136 sf->setupcalled = PETSC_FALSE; 137 PetscFunctionReturn(0); 138 } 139 140 /*@C 141 PetscSFSetType - Set the PetscSF communication implementation 142 143 Collective on PetscSF 144 145 Input Parameters: 146 + sf - the PetscSF context 147 - type - a known method 148 149 Options Database Key: 150 . -sf_type <type> - Sets the method; use -help for a list 151 of available methods (for instance, window, basic, neighbor) 152 153 Notes: 154 See "include/petscsf.h" for available methods (for instance) 155 + PETSCSFWINDOW - MPI-2/3 one-sided 156 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 157 158 Level: intermediate 159 160 .seealso: `PetscSFType`, `PetscSFCreate()` 161 @*/ 162 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) { 163 PetscBool match; 164 PetscErrorCode (*r)(PetscSF); 165 166 PetscFunctionBegin; 167 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 168 PetscValidCharPointer(type, 2); 169 170 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 171 if (match) PetscFunctionReturn(0); 172 173 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 174 PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 175 /* Destroy the previous PetscSF implementation context */ 176 PetscTryTypeMethod(sf, Destroy); 177 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 178 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 179 PetscCall((*r)(sf)); 180 PetscFunctionReturn(0); 181 } 182 183 /*@C 184 PetscSFGetType - Get the PetscSF communication implementation 185 186 Not Collective 187 188 Input Parameter: 189 . sf - the PetscSF context 190 191 Output Parameter: 192 . type - the PetscSF type name 193 194 Level: intermediate 195 196 .seealso: `PetscSFSetType()`, `PetscSFCreate()` 197 @*/ 198 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) { 199 PetscFunctionBegin; 200 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 201 PetscValidPointer(type, 2); 202 *type = ((PetscObject)sf)->type_name; 203 PetscFunctionReturn(0); 204 } 205 206 /*@C 207 PetscSFDestroy - destroy star forest 208 209 Collective 210 211 Input Parameter: 212 . sf - address of star forest 213 214 Level: intermediate 215 216 .seealso: `PetscSFCreate()`, `PetscSFReset()` 217 @*/ 218 PetscErrorCode PetscSFDestroy(PetscSF *sf) { 219 PetscFunctionBegin; 220 if (!*sf) PetscFunctionReturn(0); 221 PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1); 222 if (--((PetscObject)(*sf))->refct > 0) { 223 *sf = NULL; 224 PetscFunctionReturn(0); 225 } 226 PetscCall(PetscSFReset(*sf)); 227 PetscTryTypeMethod((*sf), Destroy); 228 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 229 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 230 PetscCall(PetscHeaderDestroy(sf)); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) { 235 PetscInt i, nleaves; 236 PetscMPIInt size; 237 const PetscInt *ilocal; 238 const PetscSFNode *iremote; 239 240 PetscFunctionBegin; 241 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 242 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 243 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 244 for (i = 0; i < nleaves; i++) { 245 const PetscInt rank = iremote[i].rank; 246 const PetscInt remote = iremote[i].index; 247 const PetscInt leaf = ilocal ? ilocal[i] : i; 248 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 249 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 250 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 251 } 252 PetscFunctionReturn(0); 253 } 254 255 /*@ 256 PetscSFSetUp - set up communication structures 257 258 Collective 259 260 Input Parameter: 261 . sf - star forest communication object 262 263 Level: beginner 264 265 .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()` 266 @*/ 267 PetscErrorCode PetscSFSetUp(PetscSF sf) { 268 PetscFunctionBegin; 269 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 270 PetscSFCheckGraphSet(sf, 1); 271 if (sf->setupcalled) PetscFunctionReturn(0); 272 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 273 PetscCall(PetscSFCheckGraphValid_Private(sf)); 274 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 275 PetscTryTypeMethod(sf, SetUp); 276 #if defined(PETSC_HAVE_CUDA) 277 if (sf->backend == PETSCSF_BACKEND_CUDA) { 278 sf->ops->Malloc = PetscSFMalloc_CUDA; 279 sf->ops->Free = PetscSFFree_CUDA; 280 } 281 #endif 282 #if defined(PETSC_HAVE_HIP) 283 if (sf->backend == PETSCSF_BACKEND_HIP) { 284 sf->ops->Malloc = PetscSFMalloc_HIP; 285 sf->ops->Free = PetscSFFree_HIP; 286 } 287 #endif 288 289 # 290 #if defined(PETSC_HAVE_KOKKOS) 291 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 292 sf->ops->Malloc = PetscSFMalloc_Kokkos; 293 sf->ops->Free = PetscSFFree_Kokkos; 294 } 295 #endif 296 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 297 sf->setupcalled = PETSC_TRUE; 298 PetscFunctionReturn(0); 299 } 300 301 /*@ 302 PetscSFSetFromOptions - set PetscSF options using the options database 303 304 Logically Collective 305 306 Input Parameter: 307 . sf - star forest 308 309 Options Database Keys: 310 + -sf_type - implementation type, see PetscSFSetType() 311 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 312 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 313 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 314 If true, this option only works with -use_gpu_aware_mpi 1. 315 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 316 If true, this option only works with -use_gpu_aware_mpi 1. 317 318 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 319 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 320 the only available is kokkos. 321 322 Level: intermediate 323 @*/ 324 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) { 325 PetscSFType deft; 326 char type[256]; 327 PetscBool flg; 328 329 PetscFunctionBegin; 330 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 331 PetscObjectOptionsBegin((PetscObject)sf); 332 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 333 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 334 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 335 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 336 #if defined(PETSC_HAVE_DEVICE) 337 { 338 char backendstr[32] = {0}; 339 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 340 /* Change the defaults set in PetscSFCreate() with command line options */ 341 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 342 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 343 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 344 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 345 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 346 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 347 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 348 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 349 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 350 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 351 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 352 #elif defined(PETSC_HAVE_KOKKOS) 353 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 354 #endif 355 } 356 #endif 357 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 358 PetscOptionsEnd(); 359 PetscFunctionReturn(0); 360 } 361 362 /*@ 363 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 364 365 Logically Collective 366 367 Input Parameters: 368 + sf - star forest 369 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 370 371 Level: advanced 372 373 .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 374 @*/ 375 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) { 376 PetscFunctionBegin; 377 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 378 PetscValidLogicalCollectiveBool(sf, flg, 2); 379 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 380 sf->rankorder = flg; 381 PetscFunctionReturn(0); 382 } 383 384 /*@C 385 PetscSFSetGraph - Set a parallel star forest 386 387 Collective 388 389 Input Parameters: 390 + sf - star forest 391 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 392 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 393 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 394 during setup in debug mode) 395 . localmode - copy mode for ilocal 396 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 397 during setup in debug mode) 398 - remotemode - copy mode for iremote 399 400 Level: intermediate 401 402 Notes: 403 Leaf indices in ilocal must be unique, otherwise an error occurs. 404 405 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 406 In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 407 PETSc might modify the respective array; 408 if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 409 Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 410 411 Fortran Notes: 412 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 413 414 Developer Notes: 415 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 416 This also allows to compare leaf sets of two SFs easily. 417 418 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 419 @*/ 420 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) { 421 PetscBool unique, contiguous; 422 423 PetscFunctionBegin; 424 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 425 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4); 426 if (nleaves > 0) PetscValidPointer(iremote, 6); 427 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 428 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 429 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 430 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 431 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 432 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 433 434 if (sf->nroots >= 0) { /* Reset only if graph already set */ 435 PetscCall(PetscSFReset(sf)); 436 } 437 438 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 439 440 sf->nroots = nroots; 441 sf->nleaves = nleaves; 442 443 if (localmode == PETSC_COPY_VALUES && ilocal) { 444 PetscInt *tlocal = NULL; 445 446 PetscCall(PetscMalloc1(nleaves, &tlocal)); 447 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 448 ilocal = tlocal; 449 } 450 if (remotemode == PETSC_COPY_VALUES) { 451 PetscSFNode *tremote = NULL; 452 453 PetscCall(PetscMalloc1(nleaves, &tremote)); 454 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 455 iremote = tremote; 456 } 457 458 if (nleaves && ilocal) { 459 PetscSFNode work; 460 461 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 462 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 463 unique = PetscNot(unique); 464 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 465 sf->minleaf = ilocal[0]; 466 sf->maxleaf = ilocal[nleaves - 1]; 467 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 468 } else { 469 sf->minleaf = 0; 470 sf->maxleaf = nleaves - 1; 471 unique = PETSC_TRUE; 472 contiguous = PETSC_TRUE; 473 } 474 475 if (contiguous) { 476 if (localmode == PETSC_USE_POINTER) { 477 ilocal = NULL; 478 } else { 479 PetscCall(PetscFree(ilocal)); 480 } 481 } 482 sf->mine = ilocal; 483 if (localmode == PETSC_USE_POINTER) { 484 sf->mine_alloc = NULL; 485 } else { 486 sf->mine_alloc = ilocal; 487 } 488 sf->remote = iremote; 489 if (remotemode == PETSC_USE_POINTER) { 490 sf->remote_alloc = NULL; 491 } else { 492 sf->remote_alloc = iremote; 493 } 494 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 495 sf->graphset = PETSC_TRUE; 496 PetscFunctionReturn(0); 497 } 498 499 /*@ 500 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 501 502 Collective 503 504 Input Parameters: 505 + sf - The PetscSF 506 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 507 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 508 509 Notes: 510 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 511 n and N are local and global sizes of x respectively. 512 513 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 514 sequential vectors y on all ranks. 515 516 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 517 sequential vector y on rank 0. 518 519 In above cases, entries of x are roots and entries of y are leaves. 520 521 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 522 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 523 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 524 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 525 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 526 527 In this case, roots and leaves are symmetric. 528 529 Level: intermediate 530 @*/ 531 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) { 532 MPI_Comm comm; 533 PetscInt n, N, res[2]; 534 PetscMPIInt rank, size; 535 PetscSFType type; 536 537 PetscFunctionBegin; 538 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 539 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2); 540 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 541 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 542 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 543 PetscCallMPI(MPI_Comm_size(comm, &size)); 544 545 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 546 type = PETSCSFALLTOALL; 547 PetscCall(PetscLayoutCreate(comm, &sf->map)); 548 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 549 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 550 PetscCall(PetscLayoutSetUp(sf->map)); 551 } else { 552 PetscCall(PetscLayoutGetLocalSize(map, &n)); 553 PetscCall(PetscLayoutGetSize(map, &N)); 554 res[0] = n; 555 res[1] = -n; 556 /* Check if n are same over all ranks so that we can optimize it */ 557 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 558 if (res[0] == -res[1]) { /* same n */ 559 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 560 } else { 561 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 562 } 563 PetscCall(PetscLayoutReference(map, &sf->map)); 564 } 565 PetscCall(PetscSFSetType(sf, type)); 566 567 sf->pattern = pattern; 568 sf->mine = NULL; /* Contiguous */ 569 570 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 571 Also set other easy stuff. 572 */ 573 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 574 sf->nleaves = N; 575 sf->nroots = n; 576 sf->nranks = size; 577 sf->minleaf = 0; 578 sf->maxleaf = N - 1; 579 } else if (pattern == PETSCSF_PATTERN_GATHER) { 580 sf->nleaves = rank ? 0 : N; 581 sf->nroots = n; 582 sf->nranks = rank ? 0 : size; 583 sf->minleaf = 0; 584 sf->maxleaf = rank ? -1 : N - 1; 585 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 586 sf->nleaves = size; 587 sf->nroots = size; 588 sf->nranks = size; 589 sf->minleaf = 0; 590 sf->maxleaf = size - 1; 591 } 592 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 593 sf->graphset = PETSC_TRUE; 594 PetscFunctionReturn(0); 595 } 596 597 /*@ 598 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 599 600 Collective 601 602 Input Parameter: 603 . sf - star forest to invert 604 605 Output Parameter: 606 . isf - inverse of sf 607 608 Level: advanced 609 610 Notes: 611 All roots must have degree 1. 612 613 The local space may be a permutation, but cannot be sparse. 614 615 .seealso: `PetscSFSetGraph()` 616 @*/ 617 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) { 618 PetscMPIInt rank; 619 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 620 const PetscInt *ilocal; 621 PetscSFNode *roots, *leaves; 622 623 PetscFunctionBegin; 624 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 625 PetscSFCheckGraphSet(sf, 1); 626 PetscValidPointer(isf, 2); 627 628 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 629 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 630 631 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 632 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 633 for (i = 0; i < maxlocal; i++) { 634 leaves[i].rank = rank; 635 leaves[i].index = i; 636 } 637 for (i = 0; i < nroots; i++) { 638 roots[i].rank = -1; 639 roots[i].index = -1; 640 } 641 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 642 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 643 644 /* Check whether our leaves are sparse */ 645 for (i = 0, count = 0; i < nroots; i++) 646 if (roots[i].rank >= 0) count++; 647 if (count == nroots) newilocal = NULL; 648 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 649 for (i = 0, count = 0; i < nroots; i++) { 650 if (roots[i].rank >= 0) { 651 newilocal[count] = i; 652 roots[count].rank = roots[i].rank; 653 roots[count].index = roots[i].index; 654 count++; 655 } 656 } 657 } 658 659 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 660 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 661 PetscCall(PetscFree2(roots, leaves)); 662 PetscFunctionReturn(0); 663 } 664 665 /*@ 666 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 667 668 Collective 669 670 Input Parameters: 671 + sf - communication object to duplicate 672 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 673 674 Output Parameter: 675 . newsf - new communication object 676 677 Level: beginner 678 679 .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 680 @*/ 681 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) { 682 PetscSFType type; 683 MPI_Datatype dtype = MPIU_SCALAR; 684 685 PetscFunctionBegin; 686 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 687 PetscValidLogicalCollectiveEnum(sf, opt, 2); 688 PetscValidPointer(newsf, 3); 689 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 690 PetscCall(PetscSFGetType(sf, &type)); 691 if (type) PetscCall(PetscSFSetType(*newsf, type)); 692 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */ 693 if (opt == PETSCSF_DUPLICATE_GRAPH) { 694 PetscSFCheckGraphSet(sf, 1); 695 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 696 PetscInt nroots, nleaves; 697 const PetscInt *ilocal; 698 const PetscSFNode *iremote; 699 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 700 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 701 } else { 702 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 703 } 704 } 705 /* Since oldtype is committed, so is newtype, according to MPI */ 706 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 707 (*newsf)->vscat.bs = sf->vscat.bs; 708 (*newsf)->vscat.unit = dtype; 709 (*newsf)->vscat.to_n = sf->vscat.to_n; 710 (*newsf)->vscat.from_n = sf->vscat.from_n; 711 /* Do not copy lsf. Build it on demand since it is rarely used */ 712 713 #if defined(PETSC_HAVE_DEVICE) 714 (*newsf)->backend = sf->backend; 715 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 716 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 717 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 718 #endif 719 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 720 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 721 PetscFunctionReturn(0); 722 } 723 724 /*@C 725 PetscSFGetGraph - Get the graph specifying a parallel star forest 726 727 Not Collective 728 729 Input Parameter: 730 . sf - star forest 731 732 Output Parameters: 733 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 734 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 735 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 736 - iremote - remote locations of root vertices for each leaf on the current process 737 738 Notes: 739 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 740 741 The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 742 see its manpage for details. 743 744 Fortran Notes: 745 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 746 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 747 748 To check for a NULL ilocal use 749 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 750 751 Level: intermediate 752 753 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 754 @*/ 755 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) { 756 PetscFunctionBegin; 757 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 758 if (sf->ops->GetGraph) { 759 PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote)); 760 } else { 761 if (nroots) *nroots = sf->nroots; 762 if (nleaves) *nleaves = sf->nleaves; 763 if (ilocal) *ilocal = sf->mine; 764 if (iremote) *iremote = sf->remote; 765 } 766 PetscFunctionReturn(0); 767 } 768 769 /*@ 770 PetscSFGetLeafRange - Get the active leaf ranges 771 772 Not Collective 773 774 Input Parameter: 775 . sf - star forest 776 777 Output Parameters: 778 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 779 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 780 781 Level: developer 782 783 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 784 @*/ 785 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) { 786 PetscFunctionBegin; 787 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 788 PetscSFCheckGraphSet(sf, 1); 789 if (minleaf) *minleaf = sf->minleaf; 790 if (maxleaf) *maxleaf = sf->maxleaf; 791 PetscFunctionReturn(0); 792 } 793 794 /*@C 795 PetscSFViewFromOptions - View from Options 796 797 Collective on PetscSF 798 799 Input Parameters: 800 + A - the star forest 801 . obj - Optional object 802 - name - command line option 803 804 Level: intermediate 805 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 806 @*/ 807 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) { 808 PetscFunctionBegin; 809 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 810 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 811 PetscFunctionReturn(0); 812 } 813 814 /*@C 815 PetscSFView - view a star forest 816 817 Collective 818 819 Input Parameters: 820 + sf - star forest 821 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 822 823 Level: beginner 824 825 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()` 826 @*/ 827 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) { 828 PetscBool iascii; 829 PetscViewerFormat format; 830 831 PetscFunctionBegin; 832 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 833 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 834 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 835 PetscCheckSameComm(sf, 1, viewer, 2); 836 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 837 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 838 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 839 PetscMPIInt rank; 840 PetscInt ii, i, j; 841 842 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 843 PetscCall(PetscViewerASCIIPushTab(viewer)); 844 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 845 if (!sf->graphset) { 846 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 847 PetscCall(PetscViewerASCIIPopTab(viewer)); 848 PetscFunctionReturn(0); 849 } 850 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 851 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 852 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 853 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 854 PetscCall(PetscViewerFlush(viewer)); 855 PetscCall(PetscViewerGetFormat(viewer, &format)); 856 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 857 PetscMPIInt *tmpranks, *perm; 858 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 859 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 860 for (i = 0; i < sf->nranks; i++) perm[i] = i; 861 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 862 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 863 for (ii = 0; ii < sf->nranks; ii++) { 864 i = perm[ii]; 865 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 866 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 867 } 868 PetscCall(PetscFree2(tmpranks, perm)); 869 } 870 PetscCall(PetscViewerFlush(viewer)); 871 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 872 } 873 PetscCall(PetscViewerASCIIPopTab(viewer)); 874 } 875 PetscTryTypeMethod(sf, View, viewer); 876 PetscFunctionReturn(0); 877 } 878 879 /*@C 880 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 881 882 Not Collective 883 884 Input Parameter: 885 . sf - star forest 886 887 Output Parameters: 888 + nranks - number of ranks referenced by local part 889 . ranks - array of ranks 890 . roffset - offset in rmine/rremote for each rank (length nranks+1) 891 . rmine - concatenated array holding local indices referencing each remote rank 892 - rremote - concatenated array holding remote indices referenced for each remote rank 893 894 Level: developer 895 896 .seealso: `PetscSFGetLeafRanks()` 897 @*/ 898 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) { 899 PetscFunctionBegin; 900 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 901 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 902 if (sf->ops->GetRootRanks) { 903 PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote)); 904 } else { 905 /* The generic implementation */ 906 if (nranks) *nranks = sf->nranks; 907 if (ranks) *ranks = sf->ranks; 908 if (roffset) *roffset = sf->roffset; 909 if (rmine) *rmine = sf->rmine; 910 if (rremote) *rremote = sf->rremote; 911 } 912 PetscFunctionReturn(0); 913 } 914 915 /*@C 916 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 917 918 Not Collective 919 920 Input Parameter: 921 . sf - star forest 922 923 Output Parameters: 924 + niranks - number of leaf ranks referencing roots on this process 925 . iranks - array of ranks 926 . ioffset - offset in irootloc for each rank (length niranks+1) 927 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 928 929 Level: developer 930 931 .seealso: `PetscSFGetRootRanks()` 932 @*/ 933 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) { 934 PetscFunctionBegin; 935 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 936 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 937 if (sf->ops->GetLeafRanks) { 938 PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc)); 939 } else { 940 PetscSFType type; 941 PetscCall(PetscSFGetType(sf, &type)); 942 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 943 } 944 PetscFunctionReturn(0); 945 } 946 947 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) { 948 PetscInt i; 949 for (i = 0; i < n; i++) { 950 if (needle == list[i]) return PETSC_TRUE; 951 } 952 return PETSC_FALSE; 953 } 954 955 /*@C 956 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 957 958 Collective 959 960 Input Parameters: 961 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 962 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 963 964 Level: developer 965 966 .seealso: `PetscSFGetRootRanks()` 967 @*/ 968 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) { 969 PetscTable table; 970 PetscTablePosition pos; 971 PetscMPIInt size, groupsize, *groupranks; 972 PetscInt *rcount, *ranks; 973 PetscInt i, irank = -1, orank = -1; 974 975 PetscFunctionBegin; 976 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 977 PetscSFCheckGraphSet(sf, 1); 978 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 979 PetscCall(PetscTableCreate(10, size, &table)); 980 for (i = 0; i < sf->nleaves; i++) { 981 /* Log 1-based rank */ 982 PetscCall(PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 983 } 984 PetscCall(PetscTableGetCount(table, &sf->nranks)); 985 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 986 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 987 PetscCall(PetscTableGetHeadPosition(table, &pos)); 988 for (i = 0; i < sf->nranks; i++) { 989 PetscCall(PetscTableGetNext(table, &pos, &ranks[i], &rcount[i])); 990 ranks[i]--; /* Convert back to 0-based */ 991 } 992 PetscCall(PetscTableDestroy(&table)); 993 994 /* We expect that dgroup is reliably "small" while nranks could be large */ 995 { 996 MPI_Group group = MPI_GROUP_NULL; 997 PetscMPIInt *dgroupranks; 998 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 999 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1000 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1001 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1002 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1003 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1004 PetscCallMPI(MPI_Group_free(&group)); 1005 PetscCall(PetscFree(dgroupranks)); 1006 } 1007 1008 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1009 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1010 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1011 if (InList(ranks[i], groupsize, groupranks)) break; 1012 } 1013 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1014 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1015 } 1016 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1017 PetscInt tmprank, tmpcount; 1018 1019 tmprank = ranks[i]; 1020 tmpcount = rcount[i]; 1021 ranks[i] = ranks[sf->ndranks]; 1022 rcount[i] = rcount[sf->ndranks]; 1023 ranks[sf->ndranks] = tmprank; 1024 rcount[sf->ndranks] = tmpcount; 1025 sf->ndranks++; 1026 } 1027 } 1028 PetscCall(PetscFree(groupranks)); 1029 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1030 PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1031 sf->roffset[0] = 0; 1032 for (i = 0; i < sf->nranks; i++) { 1033 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1034 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1035 rcount[i] = 0; 1036 } 1037 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1038 /* short circuit */ 1039 if (orank != sf->remote[i].rank) { 1040 /* Search for index of iremote[i].rank in sf->ranks */ 1041 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1042 if (irank < 0) { 1043 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1044 if (irank >= 0) irank += sf->ndranks; 1045 } 1046 orank = sf->remote[i].rank; 1047 } 1048 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1049 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1050 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1051 rcount[irank]++; 1052 } 1053 PetscCall(PetscFree2(rcount, ranks)); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 /*@C 1058 PetscSFGetGroups - gets incoming and outgoing process groups 1059 1060 Collective 1061 1062 Input Parameter: 1063 . sf - star forest 1064 1065 Output Parameters: 1066 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1067 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1068 1069 Level: developer 1070 1071 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1072 @*/ 1073 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) { 1074 MPI_Group group = MPI_GROUP_NULL; 1075 1076 PetscFunctionBegin; 1077 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1078 if (sf->ingroup == MPI_GROUP_NULL) { 1079 PetscInt i; 1080 const PetscInt *indegree; 1081 PetscMPIInt rank, *outranks, *inranks; 1082 PetscSFNode *remote; 1083 PetscSF bgcount; 1084 1085 /* Compute the number of incoming ranks */ 1086 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1087 for (i = 0; i < sf->nranks; i++) { 1088 remote[i].rank = sf->ranks[i]; 1089 remote[i].index = 0; 1090 } 1091 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1092 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1093 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1094 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1095 /* Enumerate the incoming ranks */ 1096 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1097 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1098 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1099 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1100 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1101 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1102 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1103 PetscCallMPI(MPI_Group_free(&group)); 1104 PetscCall(PetscFree2(inranks, outranks)); 1105 PetscCall(PetscSFDestroy(&bgcount)); 1106 } 1107 *incoming = sf->ingroup; 1108 1109 if (sf->outgroup == MPI_GROUP_NULL) { 1110 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1111 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1112 PetscCallMPI(MPI_Group_free(&group)); 1113 } 1114 *outgoing = sf->outgroup; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /*@ 1119 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1120 1121 Collective 1122 1123 Input Parameter: 1124 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1125 1126 Output Parameter: 1127 . multi - star forest with split roots, such that each root has degree exactly 1 1128 1129 Level: developer 1130 1131 Notes: 1132 1133 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1134 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1135 edge, it is a candidate for future optimization that might involve its removal. 1136 1137 .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1138 @*/ 1139 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) { 1140 PetscFunctionBegin; 1141 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1142 PetscValidPointer(multi, 2); 1143 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1144 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1145 *multi = sf->multi; 1146 sf->multi->multi = sf->multi; 1147 PetscFunctionReturn(0); 1148 } 1149 if (!sf->multi) { 1150 const PetscInt *indegree; 1151 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1152 PetscSFNode *remote; 1153 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1154 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1155 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1156 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1157 inoffset[0] = 0; 1158 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1159 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1160 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1161 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1162 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1163 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1164 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1165 } 1166 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1167 for (i = 0; i < sf->nleaves; i++) { 1168 remote[i].rank = sf->remote[i].rank; 1169 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1170 } 1171 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1172 sf->multi->multi = sf->multi; 1173 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1174 if (sf->rankorder) { /* Sort the ranks */ 1175 PetscMPIInt rank; 1176 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1177 PetscSFNode *newremote; 1178 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1179 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1180 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1181 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1182 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1183 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1184 /* Sort the incoming ranks at each vertex, build the inverse map */ 1185 for (i = 0; i < sf->nroots; i++) { 1186 PetscInt j; 1187 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1188 PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset)); 1189 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1190 } 1191 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1192 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1193 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1194 for (i = 0; i < sf->nleaves; i++) { 1195 newremote[i].rank = sf->remote[i].rank; 1196 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1197 } 1198 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1199 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1200 } 1201 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1202 } 1203 *multi = sf->multi; 1204 PetscFunctionReturn(0); 1205 } 1206 1207 /*@C 1208 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1209 1210 Collective 1211 1212 Input Parameters: 1213 + sf - original star forest 1214 . nselected - number of selected roots on this process 1215 - selected - indices of the selected roots on this process 1216 1217 Output Parameter: 1218 . esf - new star forest 1219 1220 Level: advanced 1221 1222 Note: 1223 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1224 be done by calling PetscSFGetGraph(). 1225 1226 .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()` 1227 @*/ 1228 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) { 1229 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1230 const PetscInt *ilocal; 1231 signed char *rootdata, *leafdata, *leafmem; 1232 const PetscSFNode *iremote; 1233 PetscSFNode *new_iremote; 1234 MPI_Comm comm; 1235 1236 PetscFunctionBegin; 1237 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1238 PetscSFCheckGraphSet(sf, 1); 1239 if (nselected) PetscValidIntPointer(selected, 3); 1240 PetscValidPointer(esf, 4); 1241 1242 PetscCall(PetscSFSetUp(sf)); 1243 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1244 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1245 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1246 1247 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1248 PetscBool dups; 1249 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1250 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1251 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1252 } 1253 1254 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1255 else { 1256 /* A generic version of creating embedded sf */ 1257 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1258 maxlocal = maxleaf - minleaf + 1; 1259 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1260 leafdata = leafmem - minleaf; 1261 /* Tag selected roots and bcast to leaves */ 1262 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1263 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1264 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1265 1266 /* Build esf with leaves that are still connected */ 1267 esf_nleaves = 0; 1268 for (i = 0; i < nleaves; i++) { 1269 j = ilocal ? ilocal[i] : i; 1270 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1271 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1272 */ 1273 esf_nleaves += (leafdata[j] ? 1 : 0); 1274 } 1275 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1276 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1277 for (i = n = 0; i < nleaves; i++) { 1278 j = ilocal ? ilocal[i] : i; 1279 if (leafdata[j]) { 1280 new_ilocal[n] = j; 1281 new_iremote[n].rank = iremote[i].rank; 1282 new_iremote[n].index = iremote[i].index; 1283 ++n; 1284 } 1285 } 1286 PetscCall(PetscSFCreate(comm, esf)); 1287 PetscCall(PetscSFSetFromOptions(*esf)); 1288 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1289 PetscCall(PetscFree2(rootdata, leafmem)); 1290 } 1291 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 /*@C 1296 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1297 1298 Collective 1299 1300 Input Parameters: 1301 + sf - original star forest 1302 . nselected - number of selected leaves on this process 1303 - selected - indices of the selected leaves on this process 1304 1305 Output Parameter: 1306 . newsf - new star forest 1307 1308 Level: advanced 1309 1310 .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1311 @*/ 1312 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) { 1313 const PetscSFNode *iremote; 1314 PetscSFNode *new_iremote; 1315 const PetscInt *ilocal; 1316 PetscInt i, nroots, *leaves, *new_ilocal; 1317 MPI_Comm comm; 1318 1319 PetscFunctionBegin; 1320 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1321 PetscSFCheckGraphSet(sf, 1); 1322 if (nselected) PetscValidIntPointer(selected, 3); 1323 PetscValidPointer(newsf, 4); 1324 1325 /* Uniq selected[] and put results in leaves[] */ 1326 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1327 PetscCall(PetscMalloc1(nselected, &leaves)); 1328 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1329 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1330 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1331 1332 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1333 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1334 else { 1335 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1336 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1337 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1338 for (i = 0; i < nselected; ++i) { 1339 const PetscInt l = leaves[i]; 1340 new_ilocal[i] = ilocal ? ilocal[l] : l; 1341 new_iremote[i].rank = iremote[l].rank; 1342 new_iremote[i].index = iremote[l].index; 1343 } 1344 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1345 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1346 } 1347 PetscCall(PetscFree(leaves)); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 /*@C 1352 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1353 1354 Collective on PetscSF 1355 1356 Input Parameters: 1357 + sf - star forest on which to communicate 1358 . unit - data type associated with each node 1359 . rootdata - buffer to broadcast 1360 - op - operation to use for reduction 1361 1362 Output Parameter: 1363 . leafdata - buffer to be reduced with values from each leaf's respective root 1364 1365 Level: intermediate 1366 1367 Notes: 1368 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1369 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1370 use PetscSFBcastWithMemTypeBegin() instead. 1371 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1372 @*/ 1373 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) { 1374 PetscMemType rootmtype, leafmtype; 1375 1376 PetscFunctionBegin; 1377 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1378 PetscCall(PetscSFSetUp(sf)); 1379 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1380 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1381 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1382 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1383 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1384 PetscFunctionReturn(0); 1385 } 1386 1387 /*@C 1388 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1389 1390 Collective on PetscSF 1391 1392 Input Parameters: 1393 + sf - star forest on which to communicate 1394 . unit - data type associated with each node 1395 . rootmtype - memory type of rootdata 1396 . rootdata - buffer to broadcast 1397 . leafmtype - memory type of leafdata 1398 - op - operation to use for reduction 1399 1400 Output Parameter: 1401 . leafdata - buffer to be reduced with values from each leaf's respective root 1402 1403 Level: intermediate 1404 1405 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1406 @*/ 1407 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) { 1408 PetscFunctionBegin; 1409 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1410 PetscCall(PetscSFSetUp(sf)); 1411 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1412 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1413 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 /*@C 1418 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1419 1420 Collective 1421 1422 Input Parameters: 1423 + sf - star forest 1424 . unit - data type 1425 . rootdata - buffer to broadcast 1426 - op - operation to use for reduction 1427 1428 Output Parameter: 1429 . leafdata - buffer to be reduced with values from each leaf's respective root 1430 1431 Level: intermediate 1432 1433 .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1434 @*/ 1435 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) { 1436 PetscFunctionBegin; 1437 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1438 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1439 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1440 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 /*@C 1445 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1446 1447 Collective 1448 1449 Input Parameters: 1450 + sf - star forest 1451 . unit - data type 1452 . leafdata - values to reduce 1453 - op - reduction operation 1454 1455 Output Parameter: 1456 . rootdata - result of reduction of values from all leaves of each root 1457 1458 Level: intermediate 1459 1460 Notes: 1461 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1462 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1463 use PetscSFReduceWithMemTypeBegin() instead. 1464 1465 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()` 1466 @*/ 1467 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) { 1468 PetscMemType rootmtype, leafmtype; 1469 1470 PetscFunctionBegin; 1471 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1472 PetscCall(PetscSFSetUp(sf)); 1473 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1474 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1475 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1476 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1477 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 /*@C 1482 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1483 1484 Collective 1485 1486 Input Parameters: 1487 + sf - star forest 1488 . unit - data type 1489 . leafmtype - memory type of leafdata 1490 . leafdata - values to reduce 1491 . rootmtype - memory type of rootdata 1492 - op - reduction operation 1493 1494 Output Parameter: 1495 . rootdata - result of reduction of values from all leaves of each root 1496 1497 Level: intermediate 1498 1499 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()` 1500 @*/ 1501 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) { 1502 PetscFunctionBegin; 1503 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1504 PetscCall(PetscSFSetUp(sf)); 1505 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1506 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1507 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1508 PetscFunctionReturn(0); 1509 } 1510 1511 /*@C 1512 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1513 1514 Collective 1515 1516 Input Parameters: 1517 + sf - star forest 1518 . unit - data type 1519 . leafdata - values to reduce 1520 - op - reduction operation 1521 1522 Output Parameter: 1523 . rootdata - result of reduction of values from all leaves of each root 1524 1525 Level: intermediate 1526 1527 .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()` 1528 @*/ 1529 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) { 1530 PetscFunctionBegin; 1531 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1532 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1533 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1534 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 /*@C 1539 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1540 1541 Collective 1542 1543 Input Parameters: 1544 + sf - star forest 1545 . unit - data type 1546 . leafdata - leaf values to use in reduction 1547 - op - operation to use for reduction 1548 1549 Output Parameters: 1550 + rootdata - root values to be updated, input state is seen by first process to perform an update 1551 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1552 1553 Level: advanced 1554 1555 Note: 1556 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1557 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1558 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1559 integers. 1560 1561 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1562 @*/ 1563 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) { 1564 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1565 1566 PetscFunctionBegin; 1567 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1568 PetscCall(PetscSFSetUp(sf)); 1569 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1570 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1571 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1572 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1573 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1574 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1575 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 /*@C 1580 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1581 1582 Collective 1583 1584 Input Parameters: 1585 + sf - star forest 1586 . unit - data type 1587 . rootmtype - memory type of rootdata 1588 . leafmtype - memory type of leafdata 1589 . leafdata - leaf values to use in reduction 1590 . leafupdatemtype - memory type of leafupdate 1591 - op - operation to use for reduction 1592 1593 Output Parameters: 1594 + rootdata - root values to be updated, input state is seen by first process to perform an update 1595 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1596 1597 Level: advanced 1598 1599 Note: See PetscSFFetchAndOpBegin() for more details. 1600 1601 .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1602 @*/ 1603 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) { 1604 PetscFunctionBegin; 1605 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1606 PetscCall(PetscSFSetUp(sf)); 1607 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1608 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1609 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1610 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 /*@C 1615 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1616 1617 Collective 1618 1619 Input Parameters: 1620 + sf - star forest 1621 . unit - data type 1622 . leafdata - leaf values to use in reduction 1623 - op - operation to use for reduction 1624 1625 Output Parameters: 1626 + rootdata - root values to be updated, input state is seen by first process to perform an update 1627 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1628 1629 Level: advanced 1630 1631 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()` 1632 @*/ 1633 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) { 1634 PetscFunctionBegin; 1635 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1636 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1637 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1638 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1639 PetscFunctionReturn(0); 1640 } 1641 1642 /*@C 1643 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1644 1645 Collective 1646 1647 Input Parameter: 1648 . sf - star forest 1649 1650 Output Parameter: 1651 . degree - degree of each root vertex 1652 1653 Level: advanced 1654 1655 Notes: 1656 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1657 1658 .seealso: `PetscSFGatherBegin()` 1659 @*/ 1660 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) { 1661 PetscFunctionBegin; 1662 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1663 PetscSFCheckGraphSet(sf, 1); 1664 PetscValidPointer(degree, 2); 1665 if (!sf->degreeknown) { 1666 PetscInt i, nroots = sf->nroots, maxlocal; 1667 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1668 maxlocal = sf->maxleaf - sf->minleaf + 1; 1669 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1670 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1671 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1672 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1673 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1674 } 1675 *degree = NULL; 1676 PetscFunctionReturn(0); 1677 } 1678 1679 /*@C 1680 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1681 1682 Collective 1683 1684 Input Parameter: 1685 . sf - star forest 1686 1687 Output Parameter: 1688 . degree - degree of each root vertex 1689 1690 Level: developer 1691 1692 Notes: 1693 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1694 1695 .seealso: 1696 @*/ 1697 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) { 1698 PetscFunctionBegin; 1699 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1700 PetscSFCheckGraphSet(sf, 1); 1701 PetscValidPointer(degree, 2); 1702 if (!sf->degreeknown) { 1703 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1704 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1705 PetscCall(PetscFree(sf->degreetmp)); 1706 sf->degreeknown = PETSC_TRUE; 1707 } 1708 *degree = sf->degree; 1709 PetscFunctionReturn(0); 1710 } 1711 1712 /*@C 1713 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1714 Each multi-root is assigned index of the corresponding original root. 1715 1716 Collective 1717 1718 Input Parameters: 1719 + sf - star forest 1720 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1721 1722 Output Parameters: 1723 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1724 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1725 1726 Level: developer 1727 1728 Notes: 1729 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1730 1731 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1732 @*/ 1733 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) { 1734 PetscSF msf; 1735 PetscInt i, j, k, nroots, nmroots; 1736 1737 PetscFunctionBegin; 1738 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1739 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1740 if (nroots) PetscValidIntPointer(degree, 2); 1741 if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3); 1742 PetscValidPointer(multiRootsOrigNumbering, 4); 1743 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1744 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1745 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1746 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1747 if (!degree[i]) continue; 1748 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1749 } 1750 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1751 if (nMultiRoots) *nMultiRoots = nmroots; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 /*@C 1756 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1757 1758 Collective 1759 1760 Input Parameters: 1761 + sf - star forest 1762 . unit - data type 1763 - leafdata - leaf data to gather to roots 1764 1765 Output Parameter: 1766 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1767 1768 Level: intermediate 1769 1770 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1771 @*/ 1772 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) { 1773 PetscSF multi = NULL; 1774 1775 PetscFunctionBegin; 1776 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1777 PetscCall(PetscSFSetUp(sf)); 1778 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1779 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 /*@C 1784 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1785 1786 Collective 1787 1788 Input Parameters: 1789 + sf - star forest 1790 . unit - data type 1791 - leafdata - leaf data to gather to roots 1792 1793 Output Parameter: 1794 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1795 1796 Level: intermediate 1797 1798 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1799 @*/ 1800 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) { 1801 PetscSF multi = NULL; 1802 1803 PetscFunctionBegin; 1804 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1805 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1806 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 /*@C 1811 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1812 1813 Collective 1814 1815 Input Parameters: 1816 + sf - star forest 1817 . unit - data type 1818 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1819 1820 Output Parameter: 1821 . leafdata - leaf data to be update with personal data from each respective root 1822 1823 Level: intermediate 1824 1825 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1826 @*/ 1827 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) { 1828 PetscSF multi = NULL; 1829 1830 PetscFunctionBegin; 1831 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1832 PetscCall(PetscSFSetUp(sf)); 1833 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1834 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 /*@C 1839 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1840 1841 Collective 1842 1843 Input Parameters: 1844 + sf - star forest 1845 . unit - data type 1846 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1847 1848 Output Parameter: 1849 . leafdata - leaf data to be update with personal data from each respective root 1850 1851 Level: intermediate 1852 1853 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1854 @*/ 1855 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) { 1856 PetscSF multi = NULL; 1857 1858 PetscFunctionBegin; 1859 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1860 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1861 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1862 PetscFunctionReturn(0); 1863 } 1864 1865 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) { 1866 PetscInt i, n, nleaves; 1867 const PetscInt *ilocal = NULL; 1868 PetscHSetI seen; 1869 1870 PetscFunctionBegin; 1871 if (PetscDefined(USE_DEBUG)) { 1872 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1873 PetscCall(PetscHSetICreate(&seen)); 1874 for (i = 0; i < nleaves; i++) { 1875 const PetscInt leaf = ilocal ? ilocal[i] : i; 1876 PetscCall(PetscHSetIAdd(seen, leaf)); 1877 } 1878 PetscCall(PetscHSetIGetSize(seen, &n)); 1879 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 1880 PetscCall(PetscHSetIDestroy(&seen)); 1881 } 1882 PetscFunctionReturn(0); 1883 } 1884 1885 /*@ 1886 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1887 1888 Input Parameters: 1889 + sfA - The first PetscSF 1890 - sfB - The second PetscSF 1891 1892 Output Parameters: 1893 . sfBA - The composite SF 1894 1895 Level: developer 1896 1897 Notes: 1898 Currently, the two SFs must be defined on congruent communicators and they must be true star 1899 forests, i.e. the same leaf is not connected with different roots. 1900 1901 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1902 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1903 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1904 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1905 1906 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1907 @*/ 1908 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) { 1909 const PetscSFNode *remotePointsA, *remotePointsB; 1910 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 1911 const PetscInt *localPointsA, *localPointsB; 1912 PetscInt *localPointsBA; 1913 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 1914 PetscBool denseB; 1915 1916 PetscFunctionBegin; 1917 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 1918 PetscSFCheckGraphSet(sfA, 1); 1919 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 1920 PetscSFCheckGraphSet(sfB, 2); 1921 PetscCheckSameComm(sfA, 1, sfB, 2); 1922 PetscValidPointer(sfBA, 3); 1923 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1924 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1925 1926 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 1927 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 1928 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size */ 1929 /* numRootsB; otherwise, garbage will be broadcasted. */ 1930 /* Example (comm size = 1): */ 1931 /* sfA: 0 <- (0, 0) */ 1932 /* sfB: 100 <- (0, 0) */ 1933 /* 101 <- (0, 1) */ 1934 /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget */ 1935 /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */ 1936 /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on */ 1937 /* remotePointsA; if not recasted, point 101 would receive a garbage value. */ 1938 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 1939 for (i = 0; i < numRootsB; i++) { 1940 reorderedRemotePointsA[i].rank = -1; 1941 reorderedRemotePointsA[i].index = -1; 1942 } 1943 for (i = 0; i < numLeavesA; i++) { 1944 PetscInt localp = localPointsA ? localPointsA[i] : i; 1945 1946 if (localp >= numRootsB) continue; 1947 reorderedRemotePointsA[localp] = remotePointsA[i]; 1948 } 1949 remotePointsA = reorderedRemotePointsA; 1950 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 1951 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 1952 for (i = 0; i < maxleaf - minleaf + 1; i++) { 1953 leafdataB[i].rank = -1; 1954 leafdataB[i].index = -1; 1955 } 1956 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 1957 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 1958 PetscCall(PetscFree(reorderedRemotePointsA)); 1959 1960 denseB = (PetscBool)!localPointsB; 1961 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 1962 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 1963 else numLeavesBA++; 1964 } 1965 if (denseB) { 1966 localPointsBA = NULL; 1967 remotePointsBA = leafdataB; 1968 } else { 1969 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 1970 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 1971 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 1972 const PetscInt l = localPointsB ? localPointsB[i] : i; 1973 1974 if (leafdataB[l - minleaf].rank == -1) continue; 1975 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 1976 localPointsBA[numLeavesBA] = l; 1977 numLeavesBA++; 1978 } 1979 PetscCall(PetscFree(leafdataB)); 1980 } 1981 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 1982 PetscCall(PetscSFSetFromOptions(*sfBA)); 1983 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 1984 PetscFunctionReturn(0); 1985 } 1986 1987 /*@ 1988 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1989 1990 Input Parameters: 1991 + sfA - The first PetscSF 1992 - sfB - The second PetscSF 1993 1994 Output Parameters: 1995 . sfBA - The composite SF. 1996 1997 Level: developer 1998 1999 Notes: 2000 Currently, the two SFs must be defined on congruent communicators and they must be true star 2001 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2002 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2003 2004 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2005 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2006 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2007 a Reduce on sfB, on connected roots. 2008 2009 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2010 @*/ 2011 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) { 2012 const PetscSFNode *remotePointsA, *remotePointsB; 2013 PetscSFNode *remotePointsBA; 2014 const PetscInt *localPointsA, *localPointsB; 2015 PetscSFNode *reorderedRemotePointsA = NULL; 2016 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2017 MPI_Op op; 2018 #if defined(PETSC_USE_64BIT_INDICES) 2019 PetscBool iswin; 2020 #endif 2021 2022 PetscFunctionBegin; 2023 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2024 PetscSFCheckGraphSet(sfA, 1); 2025 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2026 PetscSFCheckGraphSet(sfB, 2); 2027 PetscCheckSameComm(sfA, 1, sfB, 2); 2028 PetscValidPointer(sfBA, 3); 2029 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2030 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2031 2032 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2033 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2034 2035 /* TODO: Check roots of sfB have degree of 1 */ 2036 /* Once we implement it, we can replace the MPI_MAXLOC 2037 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2038 We use MPI_MAXLOC only to have a deterministic output from this routine if 2039 the root condition is not meet. 2040 */ 2041 op = MPI_MAXLOC; 2042 #if defined(PETSC_USE_64BIT_INDICES) 2043 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2044 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2045 if (iswin) op = MPI_REPLACE; 2046 #endif 2047 2048 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2049 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2050 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2051 reorderedRemotePointsA[i].rank = -1; 2052 reorderedRemotePointsA[i].index = -1; 2053 } 2054 if (localPointsA) { 2055 for (i = 0; i < numLeavesA; i++) { 2056 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2057 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2058 } 2059 } else { 2060 for (i = 0; i < numLeavesA; i++) { 2061 if (i > maxleaf || i < minleaf) continue; 2062 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2063 } 2064 } 2065 2066 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2067 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2068 for (i = 0; i < numRootsB; i++) { 2069 remotePointsBA[i].rank = -1; 2070 remotePointsBA[i].index = -1; 2071 } 2072 2073 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2074 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2075 PetscCall(PetscFree(reorderedRemotePointsA)); 2076 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2077 if (remotePointsBA[i].rank == -1) continue; 2078 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2079 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2080 localPointsBA[numLeavesBA] = i; 2081 numLeavesBA++; 2082 } 2083 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2084 PetscCall(PetscSFSetFromOptions(*sfBA)); 2085 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2086 PetscFunctionReturn(0); 2087 } 2088 2089 /* 2090 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2091 2092 Input Parameters: 2093 . sf - The global PetscSF 2094 2095 Output Parameters: 2096 . out - The local PetscSF 2097 */ 2098 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) { 2099 MPI_Comm comm; 2100 PetscMPIInt myrank; 2101 const PetscInt *ilocal; 2102 const PetscSFNode *iremote; 2103 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2104 PetscSFNode *liremote; 2105 PetscSF lsf; 2106 2107 PetscFunctionBegin; 2108 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2109 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2110 else { 2111 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2112 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2113 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2114 2115 /* Find out local edges and build a local SF */ 2116 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2117 for (i = lnleaves = 0; i < nleaves; i++) { 2118 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2119 } 2120 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2121 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2122 2123 for (i = j = 0; i < nleaves; i++) { 2124 if (iremote[i].rank == (PetscInt)myrank) { 2125 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2126 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2127 liremote[j].index = iremote[i].index; 2128 j++; 2129 } 2130 } 2131 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2132 PetscCall(PetscSFSetFromOptions(lsf)); 2133 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2134 PetscCall(PetscSFSetUp(lsf)); 2135 *out = lsf; 2136 } 2137 PetscFunctionReturn(0); 2138 } 2139 2140 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2141 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) { 2142 PetscMemType rootmtype, leafmtype; 2143 2144 PetscFunctionBegin; 2145 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2146 PetscCall(PetscSFSetUp(sf)); 2147 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2148 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2149 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2150 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2151 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 /*@ 2156 PetscSFConcatenate - concatenate multiple SFs into one 2157 2158 Input Parameters: 2159 + comm - the communicator 2160 . nsfs - the number of input PetscSF 2161 . sfs - the array of input PetscSF 2162 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE) 2163 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage 2164 2165 Output Parameters: 2166 . newsf - The resulting PetscSF 2167 2168 Level: developer 2169 2170 Notes: 2171 The communicator of all SFs in sfs must be comm. 2172 2173 The offsets in leafOffsets are added to the original leaf indices. 2174 2175 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2176 In this case, NULL is also passed as ilocal to the resulting SF. 2177 2178 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2179 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2180 2181 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2182 @*/ 2183 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) { 2184 PetscInt i, s, nLeaves, nRoots; 2185 PetscInt *leafArrayOffsets; 2186 PetscInt *ilocal_new; 2187 PetscSFNode *iremote_new; 2188 PetscInt *rootOffsets; 2189 PetscBool all_ilocal_null = PETSC_FALSE; 2190 PetscMPIInt rank; 2191 2192 PetscFunctionBegin; 2193 { 2194 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2195 2196 PetscCall(PetscSFCreate(comm, &dummy)); 2197 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2198 PetscValidPointer(sfs, 3); 2199 for (i = 0; i < nsfs; i++) { 2200 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2201 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2202 } 2203 PetscValidLogicalCollectiveBool(dummy, shareRoots, 4); 2204 if (leafOffsets) PetscValidIntPointer(leafOffsets, 5); 2205 PetscValidPointer(newsf, 6); 2206 PetscCall(PetscSFDestroy(&dummy)); 2207 } 2208 if (!nsfs) { 2209 PetscCall(PetscSFCreate(comm, newsf)); 2210 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2211 PetscFunctionReturn(0); 2212 } 2213 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2214 2215 PetscCall(PetscCalloc1(nsfs + 1, &rootOffsets)); 2216 if (shareRoots) { 2217 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2218 if (PetscDefined(USE_DEBUG)) { 2219 for (s = 1; s < nsfs; s++) { 2220 PetscInt nr; 2221 2222 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2223 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2224 } 2225 } 2226 } else { 2227 for (s = 0; s < nsfs; s++) { 2228 PetscInt nr; 2229 2230 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2231 rootOffsets[s + 1] = rootOffsets[s] + nr; 2232 } 2233 nRoots = rootOffsets[nsfs]; 2234 } 2235 2236 /* Calculate leaf array offsets and automatic root offsets */ 2237 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2238 leafArrayOffsets[0] = 0; 2239 for (s = 0; s < nsfs; s++) { 2240 PetscInt nl; 2241 2242 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2243 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2244 } 2245 nLeaves = leafArrayOffsets[nsfs]; 2246 2247 if (!leafOffsets) { 2248 all_ilocal_null = PETSC_TRUE; 2249 for (s = 0; s < nsfs; s++) { 2250 const PetscInt *ilocal; 2251 2252 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2253 if (ilocal) { 2254 all_ilocal_null = PETSC_FALSE; 2255 break; 2256 } 2257 } 2258 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2259 } 2260 2261 /* Renumber and concatenate local leaves */ 2262 ilocal_new = NULL; 2263 if (!all_ilocal_null) { 2264 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2265 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2266 for (s = 0; s < nsfs; s++) { 2267 const PetscInt *ilocal; 2268 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2269 PetscInt i, nleaves_l; 2270 2271 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2272 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2273 } 2274 } 2275 2276 /* Renumber and concatenate remote roots */ 2277 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2278 for (i = 0; i < nLeaves; i++) { 2279 iremote_new[i].rank = -1; 2280 iremote_new[i].index = -1; 2281 } 2282 for (s = 0; s < nsfs; s++) { 2283 PetscInt i, nl, nr; 2284 PetscSF tmp_sf; 2285 const PetscSFNode *iremote; 2286 PetscSFNode *tmp_rootdata; 2287 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2288 2289 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2290 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2291 /* create helper SF with contiguous leaves */ 2292 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2293 PetscCall(PetscSFSetUp(tmp_sf)); 2294 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2295 for (i = 0; i < nr; i++) { 2296 tmp_rootdata[i].index = i + rootOffsets[s]; 2297 tmp_rootdata[i].rank = (PetscInt)rank; 2298 } 2299 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2300 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2301 PetscCall(PetscSFDestroy(&tmp_sf)); 2302 PetscCall(PetscFree(tmp_rootdata)); 2303 } 2304 2305 /* Build the new SF */ 2306 PetscCall(PetscSFCreate(comm, newsf)); 2307 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2308 PetscCall(PetscSFSetUp(*newsf)); 2309 PetscCall(PetscFree(rootOffsets)); 2310 PetscCall(PetscFree(leafArrayOffsets)); 2311 PetscFunctionReturn(0); 2312 } 2313