1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME); 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 28 /*@ 29 PetscSFCreate - create a star forest communication context 30 31 Collective 32 33 Input Parameter: 34 . comm - communicator on which the star forest will operate 35 36 Output Parameter: 37 . sf - new star forest context 38 39 Options Database Keys: 40 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 41 . -sf_type window -Use MPI-3 one-sided window for communication 42 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 43 44 Level: intermediate 45 46 Notes: 47 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 48 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 49 SFs are optimized and they have better performance than general SFs. 50 51 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 52 @*/ 53 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) { 54 PetscSF b; 55 56 PetscFunctionBegin; 57 PetscValidPointer(sf, 2); 58 PetscCall(PetscSFInitializePackage()); 59 60 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 61 62 b->nroots = -1; 63 b->nleaves = -1; 64 b->minleaf = PETSC_MAX_INT; 65 b->maxleaf = PETSC_MIN_INT; 66 b->nranks = -1; 67 b->rankorder = PETSC_TRUE; 68 b->ingroup = MPI_GROUP_NULL; 69 b->outgroup = MPI_GROUP_NULL; 70 b->graphset = PETSC_FALSE; 71 #if defined(PETSC_HAVE_DEVICE) 72 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 73 b->use_stream_aware_mpi = PETSC_FALSE; 74 b->unknown_input_stream = PETSC_FALSE; 75 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 76 b->backend = PETSCSF_BACKEND_KOKKOS; 77 #elif defined(PETSC_HAVE_CUDA) 78 b->backend = PETSCSF_BACKEND_CUDA; 79 #elif defined(PETSC_HAVE_HIP) 80 b->backend = PETSCSF_BACKEND_HIP; 81 #endif 82 83 #if defined(PETSC_HAVE_NVSHMEM) 84 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 85 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 86 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 87 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 88 #endif 89 #endif 90 b->vscat.from_n = -1; 91 b->vscat.to_n = -1; 92 b->vscat.unit = MPIU_SCALAR; 93 *sf = b; 94 PetscFunctionReturn(0); 95 } 96 97 /*@ 98 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 99 100 Collective 101 102 Input Parameter: 103 . sf - star forest 104 105 Level: advanced 106 107 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 108 @*/ 109 PetscErrorCode PetscSFReset(PetscSF sf) { 110 PetscFunctionBegin; 111 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 112 PetscTryTypeMethod(sf, Reset); 113 sf->nroots = -1; 114 sf->nleaves = -1; 115 sf->minleaf = PETSC_MAX_INT; 116 sf->maxleaf = PETSC_MIN_INT; 117 sf->mine = NULL; 118 sf->remote = NULL; 119 sf->graphset = PETSC_FALSE; 120 PetscCall(PetscFree(sf->mine_alloc)); 121 PetscCall(PetscFree(sf->remote_alloc)); 122 sf->nranks = -1; 123 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 124 sf->degreeknown = PETSC_FALSE; 125 PetscCall(PetscFree(sf->degree)); 126 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 127 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 128 if (sf->multi) sf->multi->multi = NULL; 129 PetscCall(PetscSFDestroy(&sf->multi)); 130 PetscCall(PetscLayoutDestroy(&sf->map)); 131 132 #if defined(PETSC_HAVE_DEVICE) 133 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 134 #endif 135 136 sf->setupcalled = PETSC_FALSE; 137 PetscFunctionReturn(0); 138 } 139 140 /*@C 141 PetscSFSetType - Set the PetscSF communication implementation 142 143 Collective on PetscSF 144 145 Input Parameters: 146 + sf - the PetscSF context 147 - type - a known method 148 149 Options Database Key: 150 . -sf_type <type> - Sets the method; use -help for a list 151 of available methods (for instance, window, basic, neighbor) 152 153 Notes: 154 See "include/petscsf.h" for available methods (for instance) 155 + PETSCSFWINDOW - MPI-2/3 one-sided 156 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 157 158 Level: intermediate 159 160 .seealso: `PetscSFType`, `PetscSFCreate()` 161 @*/ 162 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) { 163 PetscBool match; 164 PetscErrorCode (*r)(PetscSF); 165 166 PetscFunctionBegin; 167 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 168 PetscValidCharPointer(type, 2); 169 170 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 171 if (match) PetscFunctionReturn(0); 172 173 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 174 PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 175 /* Destroy the previous PetscSF implementation context */ 176 PetscTryTypeMethod(sf, Destroy); 177 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 178 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 179 PetscCall((*r)(sf)); 180 PetscFunctionReturn(0); 181 } 182 183 /*@C 184 PetscSFGetType - Get the PetscSF communication implementation 185 186 Not Collective 187 188 Input Parameter: 189 . sf - the PetscSF context 190 191 Output Parameter: 192 . type - the PetscSF type name 193 194 Level: intermediate 195 196 .seealso: `PetscSFSetType()`, `PetscSFCreate()` 197 @*/ 198 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) { 199 PetscFunctionBegin; 200 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 201 PetscValidPointer(type, 2); 202 *type = ((PetscObject)sf)->type_name; 203 PetscFunctionReturn(0); 204 } 205 206 /*@C 207 PetscSFDestroy - destroy star forest 208 209 Collective 210 211 Input Parameter: 212 . sf - address of star forest 213 214 Level: intermediate 215 216 .seealso: `PetscSFCreate()`, `PetscSFReset()` 217 @*/ 218 PetscErrorCode PetscSFDestroy(PetscSF *sf) { 219 PetscFunctionBegin; 220 if (!*sf) PetscFunctionReturn(0); 221 PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1); 222 if (--((PetscObject)(*sf))->refct > 0) { 223 *sf = NULL; 224 PetscFunctionReturn(0); 225 } 226 PetscCall(PetscSFReset(*sf)); 227 PetscTryTypeMethod((*sf), Destroy); 228 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 229 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 230 PetscCall(PetscHeaderDestroy(sf)); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) { 235 PetscInt i, nleaves; 236 PetscMPIInt size; 237 const PetscInt *ilocal; 238 const PetscSFNode *iremote; 239 240 PetscFunctionBegin; 241 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 242 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 243 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 244 for (i = 0; i < nleaves; i++) { 245 const PetscInt rank = iremote[i].rank; 246 const PetscInt remote = iremote[i].index; 247 const PetscInt leaf = ilocal ? ilocal[i] : i; 248 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 249 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 250 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 251 } 252 PetscFunctionReturn(0); 253 } 254 255 /*@ 256 PetscSFSetUp - set up communication structures 257 258 Collective 259 260 Input Parameter: 261 . sf - star forest communication object 262 263 Level: beginner 264 265 .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()` 266 @*/ 267 PetscErrorCode PetscSFSetUp(PetscSF sf) { 268 PetscFunctionBegin; 269 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 270 PetscSFCheckGraphSet(sf, 1); 271 if (sf->setupcalled) PetscFunctionReturn(0); 272 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 273 PetscCall(PetscSFCheckGraphValid_Private(sf)); 274 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 275 PetscTryTypeMethod(sf, SetUp); 276 #if defined(PETSC_HAVE_CUDA) 277 if (sf->backend == PETSCSF_BACKEND_CUDA) { 278 sf->ops->Malloc = PetscSFMalloc_CUDA; 279 sf->ops->Free = PetscSFFree_CUDA; 280 } 281 #endif 282 #if defined(PETSC_HAVE_HIP) 283 if (sf->backend == PETSCSF_BACKEND_HIP) { 284 sf->ops->Malloc = PetscSFMalloc_HIP; 285 sf->ops->Free = PetscSFFree_HIP; 286 } 287 #endif 288 289 # 290 #if defined(PETSC_HAVE_KOKKOS) 291 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 292 sf->ops->Malloc = PetscSFMalloc_Kokkos; 293 sf->ops->Free = PetscSFFree_Kokkos; 294 } 295 #endif 296 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 297 sf->setupcalled = PETSC_TRUE; 298 PetscFunctionReturn(0); 299 } 300 301 /*@ 302 PetscSFSetFromOptions - set PetscSF options using the options database 303 304 Logically Collective 305 306 Input Parameter: 307 . sf - star forest 308 309 Options Database Keys: 310 + -sf_type - implementation type, see PetscSFSetType() 311 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 312 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 313 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 314 If true, this option only works with -use_gpu_aware_mpi 1. 315 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 316 If true, this option only works with -use_gpu_aware_mpi 1. 317 318 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 319 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 320 the only available is kokkos. 321 322 Level: intermediate 323 @*/ 324 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) { 325 PetscSFType deft; 326 char type[256]; 327 PetscBool flg; 328 329 PetscFunctionBegin; 330 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 331 PetscObjectOptionsBegin((PetscObject)sf); 332 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 333 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 334 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 335 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 336 #if defined(PETSC_HAVE_DEVICE) 337 { 338 char backendstr[32] = {0}; 339 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 340 /* Change the defaults set in PetscSFCreate() with command line options */ 341 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 342 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 343 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 344 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 345 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 346 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 347 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 348 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 349 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 350 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 351 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 352 #elif defined(PETSC_HAVE_KOKKOS) 353 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 354 #endif 355 } 356 #endif 357 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 358 PetscOptionsEnd(); 359 PetscFunctionReturn(0); 360 } 361 362 /*@ 363 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 364 365 Logically Collective 366 367 Input Parameters: 368 + sf - star forest 369 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 370 371 Level: advanced 372 373 .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 374 @*/ 375 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) { 376 PetscFunctionBegin; 377 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 378 PetscValidLogicalCollectiveBool(sf, flg, 2); 379 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 380 sf->rankorder = flg; 381 PetscFunctionReturn(0); 382 } 383 384 /*@C 385 PetscSFSetGraph - Set a parallel star forest 386 387 Collective 388 389 Input Parameters: 390 + sf - star forest 391 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 392 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 393 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 394 during setup in debug mode) 395 . localmode - copy mode for ilocal 396 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 397 during setup in debug mode) 398 - remotemode - copy mode for iremote 399 400 Level: intermediate 401 402 Notes: 403 Leaf indices in ilocal must be unique, otherwise an error occurs. 404 405 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 406 In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 407 PETSc might modify the respective array; 408 if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 409 Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 410 411 Fortran Notes: 412 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 413 414 Developer Notes: 415 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 416 This also allows to compare leaf sets of two SFs easily. 417 418 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 419 @*/ 420 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) { 421 PetscBool unique, contiguous; 422 423 PetscFunctionBegin; 424 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 425 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4); 426 if (nleaves > 0) PetscValidPointer(iremote, 6); 427 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 428 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 429 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 430 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 431 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 432 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 433 434 if (sf->nroots >= 0) { /* Reset only if graph already set */ 435 PetscCall(PetscSFReset(sf)); 436 } 437 438 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 439 440 sf->nroots = nroots; 441 sf->nleaves = nleaves; 442 443 if (localmode == PETSC_COPY_VALUES && ilocal) { 444 PetscInt *tlocal = NULL; 445 446 PetscCall(PetscMalloc1(nleaves, &tlocal)); 447 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 448 ilocal = tlocal; 449 } 450 if (remotemode == PETSC_COPY_VALUES) { 451 PetscSFNode *tremote = NULL; 452 453 PetscCall(PetscMalloc1(nleaves, &tremote)); 454 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 455 iremote = tremote; 456 } 457 458 if (nleaves && ilocal) { 459 PetscSFNode work; 460 461 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 462 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 463 unique = PetscNot(unique); 464 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 465 sf->minleaf = ilocal[0]; 466 sf->maxleaf = ilocal[nleaves - 1]; 467 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 468 } else { 469 sf->minleaf = 0; 470 sf->maxleaf = nleaves - 1; 471 unique = PETSC_TRUE; 472 contiguous = PETSC_TRUE; 473 } 474 475 if (contiguous) { 476 if (localmode == PETSC_USE_POINTER) { 477 ilocal = NULL; 478 } else { 479 PetscCall(PetscFree(ilocal)); 480 } 481 } 482 sf->mine = ilocal; 483 if (localmode == PETSC_USE_POINTER) { 484 sf->mine_alloc = NULL; 485 } else { 486 sf->mine_alloc = ilocal; 487 } 488 sf->remote = iremote; 489 if (remotemode == PETSC_USE_POINTER) { 490 sf->remote_alloc = NULL; 491 } else { 492 sf->remote_alloc = iremote; 493 } 494 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 495 sf->graphset = PETSC_TRUE; 496 PetscFunctionReturn(0); 497 } 498 499 /*@ 500 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 501 502 Collective 503 504 Input Parameters: 505 + sf - The PetscSF 506 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 507 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 508 509 Notes: 510 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 511 n and N are local and global sizes of x respectively. 512 513 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 514 sequential vectors y on all ranks. 515 516 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 517 sequential vector y on rank 0. 518 519 In above cases, entries of x are roots and entries of y are leaves. 520 521 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 522 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 523 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 524 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 525 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 526 527 In this case, roots and leaves are symmetric. 528 529 Level: intermediate 530 @*/ 531 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) { 532 MPI_Comm comm; 533 PetscInt n, N, res[2]; 534 PetscMPIInt rank, size; 535 PetscSFType type; 536 537 PetscFunctionBegin; 538 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 539 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2); 540 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 541 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 542 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 543 PetscCallMPI(MPI_Comm_size(comm, &size)); 544 545 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 546 type = PETSCSFALLTOALL; 547 PetscCall(PetscLayoutCreate(comm, &sf->map)); 548 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 549 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 550 PetscCall(PetscLayoutSetUp(sf->map)); 551 } else { 552 PetscCall(PetscLayoutGetLocalSize(map, &n)); 553 PetscCall(PetscLayoutGetSize(map, &N)); 554 res[0] = n; 555 res[1] = -n; 556 /* Check if n are same over all ranks so that we can optimize it */ 557 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 558 if (res[0] == -res[1]) { /* same n */ 559 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 560 } else { 561 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 562 } 563 PetscCall(PetscLayoutReference(map, &sf->map)); 564 } 565 PetscCall(PetscSFSetType(sf, type)); 566 567 sf->pattern = pattern; 568 sf->mine = NULL; /* Contiguous */ 569 570 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 571 Also set other easy stuff. 572 */ 573 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 574 sf->nleaves = N; 575 sf->nroots = n; 576 sf->nranks = size; 577 sf->minleaf = 0; 578 sf->maxleaf = N - 1; 579 } else if (pattern == PETSCSF_PATTERN_GATHER) { 580 sf->nleaves = rank ? 0 : N; 581 sf->nroots = n; 582 sf->nranks = rank ? 0 : size; 583 sf->minleaf = 0; 584 sf->maxleaf = rank ? -1 : N - 1; 585 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 586 sf->nleaves = size; 587 sf->nroots = size; 588 sf->nranks = size; 589 sf->minleaf = 0; 590 sf->maxleaf = size - 1; 591 } 592 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 593 sf->graphset = PETSC_TRUE; 594 PetscFunctionReturn(0); 595 } 596 597 /*@ 598 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 599 600 Collective 601 602 Input Parameter: 603 . sf - star forest to invert 604 605 Output Parameter: 606 . isf - inverse of sf 607 608 Level: advanced 609 610 Notes: 611 All roots must have degree 1. 612 613 The local space may be a permutation, but cannot be sparse. 614 615 .seealso: `PetscSFSetGraph()` 616 @*/ 617 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) { 618 PetscMPIInt rank; 619 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 620 const PetscInt *ilocal; 621 PetscSFNode *roots, *leaves; 622 623 PetscFunctionBegin; 624 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 625 PetscSFCheckGraphSet(sf, 1); 626 PetscValidPointer(isf, 2); 627 628 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 629 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 630 631 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 632 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 633 for (i = 0; i < maxlocal; i++) { 634 leaves[i].rank = rank; 635 leaves[i].index = i; 636 } 637 for (i = 0; i < nroots; i++) { 638 roots[i].rank = -1; 639 roots[i].index = -1; 640 } 641 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 642 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 643 644 /* Check whether our leaves are sparse */ 645 for (i = 0, count = 0; i < nroots; i++) 646 if (roots[i].rank >= 0) count++; 647 if (count == nroots) newilocal = NULL; 648 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 649 for (i = 0, count = 0; i < nroots; i++) { 650 if (roots[i].rank >= 0) { 651 newilocal[count] = i; 652 roots[count].rank = roots[i].rank; 653 roots[count].index = roots[i].index; 654 count++; 655 } 656 } 657 } 658 659 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 660 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 661 PetscCall(PetscFree2(roots, leaves)); 662 PetscFunctionReturn(0); 663 } 664 665 /*@ 666 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 667 668 Collective 669 670 Input Parameters: 671 + sf - communication object to duplicate 672 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 673 674 Output Parameter: 675 . newsf - new communication object 676 677 Level: beginner 678 679 .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 680 @*/ 681 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) { 682 PetscSFType type; 683 MPI_Datatype dtype = MPIU_SCALAR; 684 685 PetscFunctionBegin; 686 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 687 PetscValidLogicalCollectiveEnum(sf, opt, 2); 688 PetscValidPointer(newsf, 3); 689 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 690 PetscCall(PetscSFGetType(sf, &type)); 691 if (type) PetscCall(PetscSFSetType(*newsf, type)); 692 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */ 693 if (opt == PETSCSF_DUPLICATE_GRAPH) { 694 PetscSFCheckGraphSet(sf, 1); 695 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 696 PetscInt nroots, nleaves; 697 const PetscInt *ilocal; 698 const PetscSFNode *iremote; 699 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 700 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 701 } else { 702 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 703 } 704 } 705 /* Since oldtype is committed, so is newtype, according to MPI */ 706 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 707 (*newsf)->vscat.bs = sf->vscat.bs; 708 (*newsf)->vscat.unit = dtype; 709 (*newsf)->vscat.to_n = sf->vscat.to_n; 710 (*newsf)->vscat.from_n = sf->vscat.from_n; 711 /* Do not copy lsf. Build it on demand since it is rarely used */ 712 713 #if defined(PETSC_HAVE_DEVICE) 714 (*newsf)->backend = sf->backend; 715 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 716 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 717 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 718 #endif 719 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 720 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 721 PetscFunctionReturn(0); 722 } 723 724 /*@C 725 PetscSFGetGraph - Get the graph specifying a parallel star forest 726 727 Not Collective 728 729 Input Parameter: 730 . sf - star forest 731 732 Output Parameters: 733 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 734 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 735 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 736 - iremote - remote locations of root vertices for each leaf on the current process 737 738 Notes: 739 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 740 741 The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 742 see its manpage for details. 743 744 Fortran Notes: 745 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 746 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 747 748 To check for a NULL ilocal use 749 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 750 751 Level: intermediate 752 753 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 754 @*/ 755 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) { 756 PetscFunctionBegin; 757 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 758 if (sf->ops->GetGraph) { 759 PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote)); 760 } else { 761 if (nroots) *nroots = sf->nroots; 762 if (nleaves) *nleaves = sf->nleaves; 763 if (ilocal) *ilocal = sf->mine; 764 if (iremote) *iremote = sf->remote; 765 } 766 PetscFunctionReturn(0); 767 } 768 769 /*@ 770 PetscSFGetLeafRange - Get the active leaf ranges 771 772 Not Collective 773 774 Input Parameter: 775 . sf - star forest 776 777 Output Parameters: 778 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 779 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 780 781 Level: developer 782 783 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 784 @*/ 785 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) { 786 PetscFunctionBegin; 787 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 788 PetscSFCheckGraphSet(sf, 1); 789 if (minleaf) *minleaf = sf->minleaf; 790 if (maxleaf) *maxleaf = sf->maxleaf; 791 PetscFunctionReturn(0); 792 } 793 794 /*@C 795 PetscSFViewFromOptions - View from Options 796 797 Collective on PetscSF 798 799 Input Parameters: 800 + A - the star forest 801 . obj - Optional object 802 - name - command line option 803 804 Level: intermediate 805 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 806 @*/ 807 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) { 808 PetscFunctionBegin; 809 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 810 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 811 PetscFunctionReturn(0); 812 } 813 814 /*@C 815 PetscSFView - view a star forest 816 817 Collective 818 819 Input Parameters: 820 + sf - star forest 821 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 822 823 Level: beginner 824 825 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()` 826 @*/ 827 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) { 828 PetscBool iascii; 829 PetscViewerFormat format; 830 831 PetscFunctionBegin; 832 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 833 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 834 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 835 PetscCheckSameComm(sf, 1, viewer, 2); 836 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 837 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 838 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 839 PetscMPIInt rank; 840 PetscInt ii, i, j; 841 842 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 843 PetscCall(PetscViewerASCIIPushTab(viewer)); 844 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 845 if (!sf->graphset) { 846 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 847 PetscCall(PetscViewerASCIIPopTab(viewer)); 848 PetscFunctionReturn(0); 849 } 850 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 851 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 852 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 853 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 854 PetscCall(PetscViewerFlush(viewer)); 855 PetscCall(PetscViewerGetFormat(viewer, &format)); 856 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 857 PetscMPIInt *tmpranks, *perm; 858 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 859 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 860 for (i = 0; i < sf->nranks; i++) perm[i] = i; 861 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 862 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 863 for (ii = 0; ii < sf->nranks; ii++) { 864 i = perm[ii]; 865 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 866 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 867 } 868 PetscCall(PetscFree2(tmpranks, perm)); 869 } 870 PetscCall(PetscViewerFlush(viewer)); 871 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 872 } 873 PetscCall(PetscViewerASCIIPopTab(viewer)); 874 } 875 PetscTryTypeMethod(sf, View, viewer); 876 PetscFunctionReturn(0); 877 } 878 879 /*@C 880 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 881 882 Not Collective 883 884 Input Parameter: 885 . sf - star forest 886 887 Output Parameters: 888 + nranks - number of ranks referenced by local part 889 . ranks - array of ranks 890 . roffset - offset in rmine/rremote for each rank (length nranks+1) 891 . rmine - concatenated array holding local indices referencing each remote rank 892 - rremote - concatenated array holding remote indices referenced for each remote rank 893 894 Level: developer 895 896 .seealso: `PetscSFGetLeafRanks()` 897 @*/ 898 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) { 899 PetscFunctionBegin; 900 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 901 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 902 if (sf->ops->GetRootRanks) { 903 PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote)); 904 } else { 905 /* The generic implementation */ 906 if (nranks) *nranks = sf->nranks; 907 if (ranks) *ranks = sf->ranks; 908 if (roffset) *roffset = sf->roffset; 909 if (rmine) *rmine = sf->rmine; 910 if (rremote) *rremote = sf->rremote; 911 } 912 PetscFunctionReturn(0); 913 } 914 915 /*@C 916 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 917 918 Not Collective 919 920 Input Parameter: 921 . sf - star forest 922 923 Output Parameters: 924 + niranks - number of leaf ranks referencing roots on this process 925 . iranks - array of ranks 926 . ioffset - offset in irootloc for each rank (length niranks+1) 927 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 928 929 Level: developer 930 931 .seealso: `PetscSFGetRootRanks()` 932 @*/ 933 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) { 934 PetscFunctionBegin; 935 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 936 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 937 if (sf->ops->GetLeafRanks) { 938 PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc)); 939 } else { 940 PetscSFType type; 941 PetscCall(PetscSFGetType(sf, &type)); 942 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 943 } 944 PetscFunctionReturn(0); 945 } 946 947 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) { 948 PetscInt i; 949 for (i = 0; i < n; i++) { 950 if (needle == list[i]) return PETSC_TRUE; 951 } 952 return PETSC_FALSE; 953 } 954 955 /*@C 956 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 957 958 Collective 959 960 Input Parameters: 961 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 962 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 963 964 Level: developer 965 966 .seealso: `PetscSFGetRootRanks()` 967 @*/ 968 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) { 969 PetscTable table; 970 PetscTablePosition pos; 971 PetscMPIInt size, groupsize, *groupranks; 972 PetscInt *rcount, *ranks; 973 PetscInt i, irank = -1, orank = -1; 974 975 PetscFunctionBegin; 976 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 977 PetscSFCheckGraphSet(sf, 1); 978 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 979 PetscCall(PetscTableCreate(10, size, &table)); 980 for (i = 0; i < sf->nleaves; i++) { 981 /* Log 1-based rank */ 982 PetscCall(PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 983 } 984 PetscCall(PetscTableGetCount(table, &sf->nranks)); 985 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 986 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 987 PetscCall(PetscTableGetHeadPosition(table, &pos)); 988 for (i = 0; i < sf->nranks; i++) { 989 PetscCall(PetscTableGetNext(table, &pos, &ranks[i], &rcount[i])); 990 ranks[i]--; /* Convert back to 0-based */ 991 } 992 PetscCall(PetscTableDestroy(&table)); 993 994 /* We expect that dgroup is reliably "small" while nranks could be large */ 995 { 996 MPI_Group group = MPI_GROUP_NULL; 997 PetscMPIInt *dgroupranks; 998 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 999 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1000 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1001 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1002 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1003 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1004 PetscCallMPI(MPI_Group_free(&group)); 1005 PetscCall(PetscFree(dgroupranks)); 1006 } 1007 1008 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1009 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1010 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1011 if (InList(ranks[i], groupsize, groupranks)) break; 1012 } 1013 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1014 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1015 } 1016 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1017 PetscInt tmprank, tmpcount; 1018 1019 tmprank = ranks[i]; 1020 tmpcount = rcount[i]; 1021 ranks[i] = ranks[sf->ndranks]; 1022 rcount[i] = rcount[sf->ndranks]; 1023 ranks[sf->ndranks] = tmprank; 1024 rcount[sf->ndranks] = tmpcount; 1025 sf->ndranks++; 1026 } 1027 } 1028 PetscCall(PetscFree(groupranks)); 1029 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1030 PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1031 sf->roffset[0] = 0; 1032 for (i = 0; i < sf->nranks; i++) { 1033 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1034 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1035 rcount[i] = 0; 1036 } 1037 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1038 /* short circuit */ 1039 if (orank != sf->remote[i].rank) { 1040 /* Search for index of iremote[i].rank in sf->ranks */ 1041 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1042 if (irank < 0) { 1043 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1044 if (irank >= 0) irank += sf->ndranks; 1045 } 1046 orank = sf->remote[i].rank; 1047 } 1048 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1049 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1050 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1051 rcount[irank]++; 1052 } 1053 PetscCall(PetscFree2(rcount, ranks)); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 /*@C 1058 PetscSFGetGroups - gets incoming and outgoing process groups 1059 1060 Collective 1061 1062 Input Parameter: 1063 . sf - star forest 1064 1065 Output Parameters: 1066 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1067 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1068 1069 Level: developer 1070 1071 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1072 @*/ 1073 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) { 1074 MPI_Group group = MPI_GROUP_NULL; 1075 1076 PetscFunctionBegin; 1077 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1078 if (sf->ingroup == MPI_GROUP_NULL) { 1079 PetscInt i; 1080 const PetscInt *indegree; 1081 PetscMPIInt rank, *outranks, *inranks; 1082 PetscSFNode *remote; 1083 PetscSF bgcount; 1084 1085 /* Compute the number of incoming ranks */ 1086 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1087 for (i = 0; i < sf->nranks; i++) { 1088 remote[i].rank = sf->ranks[i]; 1089 remote[i].index = 0; 1090 } 1091 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1092 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1093 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1094 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1095 /* Enumerate the incoming ranks */ 1096 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1097 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1098 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1099 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1100 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1101 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1102 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1103 PetscCallMPI(MPI_Group_free(&group)); 1104 PetscCall(PetscFree2(inranks, outranks)); 1105 PetscCall(PetscSFDestroy(&bgcount)); 1106 } 1107 *incoming = sf->ingroup; 1108 1109 if (sf->outgroup == MPI_GROUP_NULL) { 1110 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1111 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1112 PetscCallMPI(MPI_Group_free(&group)); 1113 } 1114 *outgoing = sf->outgroup; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /*@ 1119 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1120 1121 Collective 1122 1123 Input Parameter: 1124 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1125 1126 Output Parameter: 1127 . multi - star forest with split roots, such that each root has degree exactly 1 1128 1129 Level: developer 1130 1131 Notes: 1132 1133 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1134 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1135 edge, it is a candidate for future optimization that might involve its removal. 1136 1137 .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1138 @*/ 1139 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) { 1140 PetscFunctionBegin; 1141 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1142 PetscValidPointer(multi, 2); 1143 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1144 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1145 *multi = sf->multi; 1146 sf->multi->multi = sf->multi; 1147 PetscFunctionReturn(0); 1148 } 1149 if (!sf->multi) { 1150 const PetscInt *indegree; 1151 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1152 PetscSFNode *remote; 1153 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1154 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1155 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1156 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1157 inoffset[0] = 0; 1158 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1159 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1160 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1161 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1162 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1163 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1164 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1165 } 1166 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1167 for (i = 0; i < sf->nleaves; i++) { 1168 remote[i].rank = sf->remote[i].rank; 1169 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1170 } 1171 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1172 sf->multi->multi = sf->multi; 1173 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1174 if (sf->rankorder) { /* Sort the ranks */ 1175 PetscMPIInt rank; 1176 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1177 PetscSFNode *newremote; 1178 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1179 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1180 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1181 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1182 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1183 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1184 /* Sort the incoming ranks at each vertex, build the inverse map */ 1185 for (i = 0; i < sf->nroots; i++) { 1186 PetscInt j; 1187 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1188 PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset)); 1189 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1190 } 1191 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1192 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1193 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1194 for (i = 0; i < sf->nleaves; i++) { 1195 newremote[i].rank = sf->remote[i].rank; 1196 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1197 } 1198 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1199 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1200 } 1201 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1202 } 1203 *multi = sf->multi; 1204 PetscFunctionReturn(0); 1205 } 1206 1207 /*@C 1208 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1209 1210 Collective 1211 1212 Input Parameters: 1213 + sf - original star forest 1214 . nselected - number of selected roots on this process 1215 - selected - indices of the selected roots on this process 1216 1217 Output Parameter: 1218 . esf - new star forest 1219 1220 Level: advanced 1221 1222 Note: 1223 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1224 be done by calling PetscSFGetGraph(). 1225 1226 .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()` 1227 @*/ 1228 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) { 1229 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1230 const PetscInt *ilocal; 1231 signed char *rootdata, *leafdata, *leafmem; 1232 const PetscSFNode *iremote; 1233 PetscSFNode *new_iremote; 1234 MPI_Comm comm; 1235 1236 PetscFunctionBegin; 1237 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1238 PetscSFCheckGraphSet(sf, 1); 1239 if (nselected) PetscValidIntPointer(selected, 3); 1240 PetscValidPointer(esf, 4); 1241 1242 PetscCall(PetscSFSetUp(sf)); 1243 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1244 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1245 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1246 1247 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1248 PetscBool dups; 1249 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1250 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1251 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1252 } 1253 1254 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1255 else { 1256 /* A generic version of creating embedded sf */ 1257 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1258 maxlocal = maxleaf - minleaf + 1; 1259 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1260 leafdata = leafmem - minleaf; 1261 /* Tag selected roots and bcast to leaves */ 1262 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1263 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1264 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1265 1266 /* Build esf with leaves that are still connected */ 1267 esf_nleaves = 0; 1268 for (i = 0; i < nleaves; i++) { 1269 j = ilocal ? ilocal[i] : i; 1270 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1271 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1272 */ 1273 esf_nleaves += (leafdata[j] ? 1 : 0); 1274 } 1275 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1276 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1277 for (i = n = 0; i < nleaves; i++) { 1278 j = ilocal ? ilocal[i] : i; 1279 if (leafdata[j]) { 1280 new_ilocal[n] = j; 1281 new_iremote[n].rank = iremote[i].rank; 1282 new_iremote[n].index = iremote[i].index; 1283 ++n; 1284 } 1285 } 1286 PetscCall(PetscSFCreate(comm, esf)); 1287 PetscCall(PetscSFSetFromOptions(*esf)); 1288 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1289 PetscCall(PetscFree2(rootdata, leafmem)); 1290 } 1291 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 /*@C 1296 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1297 1298 Collective 1299 1300 Input Parameters: 1301 + sf - original star forest 1302 . nselected - number of selected leaves on this process 1303 - selected - indices of the selected leaves on this process 1304 1305 Output Parameter: 1306 . newsf - new star forest 1307 1308 Level: advanced 1309 1310 .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1311 @*/ 1312 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) { 1313 const PetscSFNode *iremote; 1314 PetscSFNode *new_iremote; 1315 const PetscInt *ilocal; 1316 PetscInt i, nroots, *leaves, *new_ilocal; 1317 MPI_Comm comm; 1318 1319 PetscFunctionBegin; 1320 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1321 PetscSFCheckGraphSet(sf, 1); 1322 if (nselected) PetscValidIntPointer(selected, 3); 1323 PetscValidPointer(newsf, 4); 1324 1325 /* Uniq selected[] and put results in leaves[] */ 1326 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1327 PetscCall(PetscMalloc1(nselected, &leaves)); 1328 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1329 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1330 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1331 1332 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1333 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1334 else { 1335 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1336 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1337 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1338 for (i = 0; i < nselected; ++i) { 1339 const PetscInt l = leaves[i]; 1340 new_ilocal[i] = ilocal ? ilocal[l] : l; 1341 new_iremote[i].rank = iremote[l].rank; 1342 new_iremote[i].index = iremote[l].index; 1343 } 1344 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1345 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1346 } 1347 PetscCall(PetscFree(leaves)); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 /*@C 1352 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1353 1354 Collective on PetscSF 1355 1356 Input Parameters: 1357 + sf - star forest on which to communicate 1358 . unit - data type associated with each node 1359 . rootdata - buffer to broadcast 1360 - op - operation to use for reduction 1361 1362 Output Parameter: 1363 . leafdata - buffer to be reduced with values from each leaf's respective root 1364 1365 Level: intermediate 1366 1367 Notes: 1368 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1369 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1370 use PetscSFBcastWithMemTypeBegin() instead. 1371 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1372 @*/ 1373 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) { 1374 PetscMemType rootmtype, leafmtype; 1375 1376 PetscFunctionBegin; 1377 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1378 PetscCall(PetscSFSetUp(sf)); 1379 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1380 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1381 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1382 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1383 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1384 PetscFunctionReturn(0); 1385 } 1386 1387 /*@C 1388 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1389 1390 Collective on PetscSF 1391 1392 Input Parameters: 1393 + sf - star forest on which to communicate 1394 . unit - data type associated with each node 1395 . rootmtype - memory type of rootdata 1396 . rootdata - buffer to broadcast 1397 . leafmtype - memory type of leafdata 1398 - op - operation to use for reduction 1399 1400 Output Parameter: 1401 . leafdata - buffer to be reduced with values from each leaf's respective root 1402 1403 Level: intermediate 1404 1405 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1406 @*/ 1407 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) { 1408 PetscFunctionBegin; 1409 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1410 PetscCall(PetscSFSetUp(sf)); 1411 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1412 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1413 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 /*@C 1418 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1419 1420 Collective 1421 1422 Input Parameters: 1423 + sf - star forest 1424 . unit - data type 1425 . rootdata - buffer to broadcast 1426 - op - operation to use for reduction 1427 1428 Output Parameter: 1429 . leafdata - buffer to be reduced with values from each leaf's respective root 1430 1431 Level: intermediate 1432 1433 .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1434 @*/ 1435 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) { 1436 PetscFunctionBegin; 1437 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1438 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1439 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1440 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 /*@C 1445 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1446 1447 Collective 1448 1449 Input Parameters: 1450 + sf - star forest 1451 . unit - data type 1452 . leafdata - values to reduce 1453 - op - reduction operation 1454 1455 Output Parameter: 1456 . rootdata - result of reduction of values from all leaves of each root 1457 1458 Level: intermediate 1459 1460 Notes: 1461 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1462 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1463 use PetscSFReduceWithMemTypeBegin() instead. 1464 1465 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()` 1466 @*/ 1467 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) { 1468 PetscMemType rootmtype, leafmtype; 1469 1470 PetscFunctionBegin; 1471 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1472 PetscCall(PetscSFSetUp(sf)); 1473 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1474 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1475 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1476 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1477 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 /*@C 1482 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1483 1484 Collective 1485 1486 Input Parameters: 1487 + sf - star forest 1488 . unit - data type 1489 . leafmtype - memory type of leafdata 1490 . leafdata - values to reduce 1491 . rootmtype - memory type of rootdata 1492 - op - reduction operation 1493 1494 Output Parameter: 1495 . rootdata - result of reduction of values from all leaves of each root 1496 1497 Level: intermediate 1498 1499 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()` 1500 @*/ 1501 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) { 1502 PetscFunctionBegin; 1503 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1504 PetscCall(PetscSFSetUp(sf)); 1505 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1506 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1507 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1508 PetscFunctionReturn(0); 1509 } 1510 1511 /*@C 1512 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1513 1514 Collective 1515 1516 Input Parameters: 1517 + sf - star forest 1518 . unit - data type 1519 . leafdata - values to reduce 1520 - op - reduction operation 1521 1522 Output Parameter: 1523 . rootdata - result of reduction of values from all leaves of each root 1524 1525 Level: intermediate 1526 1527 .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()` 1528 @*/ 1529 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) { 1530 PetscFunctionBegin; 1531 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1532 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1533 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1534 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 /*@C 1539 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1540 1541 Collective 1542 1543 Input Parameters: 1544 + sf - star forest 1545 . unit - data type 1546 . leafdata - leaf values to use in reduction 1547 - op - operation to use for reduction 1548 1549 Output Parameters: 1550 + rootdata - root values to be updated, input state is seen by first process to perform an update 1551 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1552 1553 Level: advanced 1554 1555 Note: 1556 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1557 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1558 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1559 integers. 1560 1561 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1562 @*/ 1563 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) { 1564 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1565 1566 PetscFunctionBegin; 1567 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1568 PetscCall(PetscSFSetUp(sf)); 1569 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1570 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1571 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1572 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1573 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1574 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1575 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 /*@C 1580 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1581 1582 Collective 1583 1584 Input Parameters: 1585 + sf - star forest 1586 . unit - data type 1587 . rootmtype - memory type of rootdata 1588 . leafmtype - memory type of leafdata 1589 . leafdata - leaf values to use in reduction 1590 . leafupdatemtype - memory type of leafupdate 1591 - op - operation to use for reduction 1592 1593 Output Parameters: 1594 + rootdata - root values to be updated, input state is seen by first process to perform an update 1595 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1596 1597 Level: advanced 1598 1599 Note: See PetscSFFetchAndOpBegin() for more details. 1600 1601 .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1602 @*/ 1603 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) { 1604 PetscFunctionBegin; 1605 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1606 PetscCall(PetscSFSetUp(sf)); 1607 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1608 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1609 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1610 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 /*@C 1615 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1616 1617 Collective 1618 1619 Input Parameters: 1620 + sf - star forest 1621 . unit - data type 1622 . leafdata - leaf values to use in reduction 1623 - op - operation to use for reduction 1624 1625 Output Parameters: 1626 + rootdata - root values to be updated, input state is seen by first process to perform an update 1627 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1628 1629 Level: advanced 1630 1631 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()` 1632 @*/ 1633 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) { 1634 PetscFunctionBegin; 1635 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1636 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1637 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1638 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1639 PetscFunctionReturn(0); 1640 } 1641 1642 /*@C 1643 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1644 1645 Collective 1646 1647 Input Parameter: 1648 . sf - star forest 1649 1650 Output Parameter: 1651 . degree - degree of each root vertex 1652 1653 Level: advanced 1654 1655 Notes: 1656 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1657 1658 .seealso: `PetscSFGatherBegin()` 1659 @*/ 1660 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) { 1661 PetscFunctionBegin; 1662 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1663 PetscSFCheckGraphSet(sf, 1); 1664 PetscValidPointer(degree, 2); 1665 if (!sf->degreeknown) { 1666 PetscInt i, nroots = sf->nroots, maxlocal; 1667 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1668 maxlocal = sf->maxleaf - sf->minleaf + 1; 1669 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1670 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1671 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1672 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1673 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1674 } 1675 *degree = NULL; 1676 PetscFunctionReturn(0); 1677 } 1678 1679 /*@C 1680 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1681 1682 Collective 1683 1684 Input Parameter: 1685 . sf - star forest 1686 1687 Output Parameter: 1688 . degree - degree of each root vertex 1689 1690 Level: developer 1691 1692 Notes: 1693 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1694 1695 .seealso: 1696 @*/ 1697 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) { 1698 PetscFunctionBegin; 1699 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1700 PetscSFCheckGraphSet(sf, 1); 1701 PetscValidPointer(degree, 2); 1702 if (!sf->degreeknown) { 1703 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1704 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1705 PetscCall(PetscFree(sf->degreetmp)); 1706 sf->degreeknown = PETSC_TRUE; 1707 } 1708 *degree = sf->degree; 1709 PetscFunctionReturn(0); 1710 } 1711 1712 /*@C 1713 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1714 Each multi-root is assigned index of the corresponding original root. 1715 1716 Collective 1717 1718 Input Parameters: 1719 + sf - star forest 1720 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1721 1722 Output Parameters: 1723 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1724 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1725 1726 Level: developer 1727 1728 Notes: 1729 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1730 1731 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1732 @*/ 1733 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) { 1734 PetscSF msf; 1735 PetscInt i, j, k, nroots, nmroots; 1736 1737 PetscFunctionBegin; 1738 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1739 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1740 if (nroots) PetscValidIntPointer(degree, 2); 1741 if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3); 1742 PetscValidPointer(multiRootsOrigNumbering, 4); 1743 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1744 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1745 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1746 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1747 if (!degree[i]) continue; 1748 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1749 } 1750 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1751 if (nMultiRoots) *nMultiRoots = nmroots; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 /*@C 1756 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1757 1758 Collective 1759 1760 Input Parameters: 1761 + sf - star forest 1762 . unit - data type 1763 - leafdata - leaf data to gather to roots 1764 1765 Output Parameter: 1766 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1767 1768 Level: intermediate 1769 1770 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1771 @*/ 1772 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) { 1773 PetscSF multi = NULL; 1774 1775 PetscFunctionBegin; 1776 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1777 PetscCall(PetscSFSetUp(sf)); 1778 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1779 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 /*@C 1784 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1785 1786 Collective 1787 1788 Input Parameters: 1789 + sf - star forest 1790 . unit - data type 1791 - leafdata - leaf data to gather to roots 1792 1793 Output Parameter: 1794 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1795 1796 Level: intermediate 1797 1798 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1799 @*/ 1800 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) { 1801 PetscSF multi = NULL; 1802 1803 PetscFunctionBegin; 1804 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1805 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1806 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 /*@C 1811 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1812 1813 Collective 1814 1815 Input Parameters: 1816 + sf - star forest 1817 . unit - data type 1818 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1819 1820 Output Parameter: 1821 . leafdata - leaf data to be update with personal data from each respective root 1822 1823 Level: intermediate 1824 1825 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1826 @*/ 1827 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) { 1828 PetscSF multi = NULL; 1829 1830 PetscFunctionBegin; 1831 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1832 PetscCall(PetscSFSetUp(sf)); 1833 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1834 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 /*@C 1839 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1840 1841 Collective 1842 1843 Input Parameters: 1844 + sf - star forest 1845 . unit - data type 1846 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1847 1848 Output Parameter: 1849 . leafdata - leaf data to be update with personal data from each respective root 1850 1851 Level: intermediate 1852 1853 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1854 @*/ 1855 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) { 1856 PetscSF multi = NULL; 1857 1858 PetscFunctionBegin; 1859 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1860 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1861 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1862 PetscFunctionReturn(0); 1863 } 1864 1865 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) { 1866 PetscInt i, n, nleaves; 1867 const PetscInt *ilocal = NULL; 1868 PetscHSetI seen; 1869 1870 PetscFunctionBegin; 1871 if (PetscDefined(USE_DEBUG)) { 1872 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1873 PetscCall(PetscHSetICreate(&seen)); 1874 for (i = 0; i < nleaves; i++) { 1875 const PetscInt leaf = ilocal ? ilocal[i] : i; 1876 PetscCall(PetscHSetIAdd(seen, leaf)); 1877 } 1878 PetscCall(PetscHSetIGetSize(seen, &n)); 1879 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 1880 PetscCall(PetscHSetIDestroy(&seen)); 1881 } 1882 PetscFunctionReturn(0); 1883 } 1884 1885 /*@ 1886 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1887 1888 Input Parameters: 1889 + sfA - The first PetscSF 1890 - sfB - The second PetscSF 1891 1892 Output Parameters: 1893 . sfBA - The composite SF 1894 1895 Level: developer 1896 1897 Notes: 1898 Currently, the two SFs must be defined on congruent communicators and they must be true star 1899 forests, i.e. the same leaf is not connected with different roots. 1900 1901 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1902 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1903 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1904 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1905 1906 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1907 @*/ 1908 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) { 1909 const PetscSFNode *remotePointsA, *remotePointsB; 1910 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 1911 const PetscInt *localPointsA, *localPointsB; 1912 PetscInt *localPointsBA; 1913 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 1914 PetscBool denseB; 1915 1916 PetscFunctionBegin; 1917 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 1918 PetscSFCheckGraphSet(sfA, 1); 1919 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 1920 PetscSFCheckGraphSet(sfB, 2); 1921 PetscCheckSameComm(sfA, 1, sfB, 2); 1922 PetscValidPointer(sfBA, 3); 1923 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1924 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1925 1926 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 1927 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 1928 if (localPointsA) { 1929 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 1930 for (i = 0; i < numRootsB; i++) { 1931 reorderedRemotePointsA[i].rank = -1; 1932 reorderedRemotePointsA[i].index = -1; 1933 } 1934 for (i = 0; i < numLeavesA; i++) { 1935 if (localPointsA[i] >= numRootsB) continue; 1936 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1937 } 1938 remotePointsA = reorderedRemotePointsA; 1939 } 1940 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 1941 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 1942 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 1943 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 1944 PetscCall(PetscFree(reorderedRemotePointsA)); 1945 1946 denseB = (PetscBool)!localPointsB; 1947 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 1948 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 1949 else numLeavesBA++; 1950 } 1951 if (denseB) { 1952 localPointsBA = NULL; 1953 remotePointsBA = leafdataB; 1954 } else { 1955 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 1956 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 1957 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 1958 const PetscInt l = localPointsB ? localPointsB[i] : i; 1959 1960 if (leafdataB[l - minleaf].rank == -1) continue; 1961 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 1962 localPointsBA[numLeavesBA] = l; 1963 numLeavesBA++; 1964 } 1965 PetscCall(PetscFree(leafdataB)); 1966 } 1967 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 1968 PetscCall(PetscSFSetFromOptions(*sfBA)); 1969 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 1970 PetscFunctionReturn(0); 1971 } 1972 1973 /*@ 1974 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1975 1976 Input Parameters: 1977 + sfA - The first PetscSF 1978 - sfB - The second PetscSF 1979 1980 Output Parameters: 1981 . sfBA - The composite SF. 1982 1983 Level: developer 1984 1985 Notes: 1986 Currently, the two SFs must be defined on congruent communicators and they must be true star 1987 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1988 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1989 1990 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1991 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1992 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1993 a Reduce on sfB, on connected roots. 1994 1995 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 1996 @*/ 1997 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) { 1998 const PetscSFNode *remotePointsA, *remotePointsB; 1999 PetscSFNode *remotePointsBA; 2000 const PetscInt *localPointsA, *localPointsB; 2001 PetscSFNode *reorderedRemotePointsA = NULL; 2002 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2003 MPI_Op op; 2004 #if defined(PETSC_USE_64BIT_INDICES) 2005 PetscBool iswin; 2006 #endif 2007 2008 PetscFunctionBegin; 2009 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2010 PetscSFCheckGraphSet(sfA, 1); 2011 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2012 PetscSFCheckGraphSet(sfB, 2); 2013 PetscCheckSameComm(sfA, 1, sfB, 2); 2014 PetscValidPointer(sfBA, 3); 2015 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2016 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2017 2018 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2019 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2020 2021 /* TODO: Check roots of sfB have degree of 1 */ 2022 /* Once we implement it, we can replace the MPI_MAXLOC 2023 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2024 We use MPI_MAXLOC only to have a deterministic output from this routine if 2025 the root condition is not meet. 2026 */ 2027 op = MPI_MAXLOC; 2028 #if defined(PETSC_USE_64BIT_INDICES) 2029 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2030 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2031 if (iswin) op = MPI_REPLACE; 2032 #endif 2033 2034 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2035 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2036 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2037 reorderedRemotePointsA[i].rank = -1; 2038 reorderedRemotePointsA[i].index = -1; 2039 } 2040 if (localPointsA) { 2041 for (i = 0; i < numLeavesA; i++) { 2042 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2043 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2044 } 2045 } else { 2046 for (i = 0; i < numLeavesA; i++) { 2047 if (i > maxleaf || i < minleaf) continue; 2048 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2049 } 2050 } 2051 2052 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2053 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2054 for (i = 0; i < numRootsB; i++) { 2055 remotePointsBA[i].rank = -1; 2056 remotePointsBA[i].index = -1; 2057 } 2058 2059 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2060 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2061 PetscCall(PetscFree(reorderedRemotePointsA)); 2062 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2063 if (remotePointsBA[i].rank == -1) continue; 2064 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2065 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2066 localPointsBA[numLeavesBA] = i; 2067 numLeavesBA++; 2068 } 2069 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2070 PetscCall(PetscSFSetFromOptions(*sfBA)); 2071 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 /* 2076 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2077 2078 Input Parameters: 2079 . sf - The global PetscSF 2080 2081 Output Parameters: 2082 . out - The local PetscSF 2083 */ 2084 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) { 2085 MPI_Comm comm; 2086 PetscMPIInt myrank; 2087 const PetscInt *ilocal; 2088 const PetscSFNode *iremote; 2089 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2090 PetscSFNode *liremote; 2091 PetscSF lsf; 2092 2093 PetscFunctionBegin; 2094 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2095 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2096 else { 2097 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2098 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2099 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2100 2101 /* Find out local edges and build a local SF */ 2102 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2103 for (i = lnleaves = 0; i < nleaves; i++) { 2104 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2105 } 2106 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2107 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2108 2109 for (i = j = 0; i < nleaves; i++) { 2110 if (iremote[i].rank == (PetscInt)myrank) { 2111 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2112 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2113 liremote[j].index = iremote[i].index; 2114 j++; 2115 } 2116 } 2117 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2118 PetscCall(PetscSFSetFromOptions(lsf)); 2119 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2120 PetscCall(PetscSFSetUp(lsf)); 2121 *out = lsf; 2122 } 2123 PetscFunctionReturn(0); 2124 } 2125 2126 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2127 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) { 2128 PetscMemType rootmtype, leafmtype; 2129 2130 PetscFunctionBegin; 2131 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2132 PetscCall(PetscSFSetUp(sf)); 2133 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2134 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2135 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2136 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2137 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 /*@ 2142 PetscSFConcatenate - concatenate multiple SFs into one 2143 2144 Input Parameters: 2145 + comm - the communicator 2146 . nsfs - the number of input PetscSF 2147 . sfs - the array of input PetscSF 2148 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE) 2149 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage 2150 2151 Output Parameters: 2152 . newsf - The resulting PetscSF 2153 2154 Level: developer 2155 2156 Notes: 2157 The communicator of all SFs in sfs must be comm. 2158 2159 The offsets in leafOffsets are added to the original leaf indices. 2160 2161 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2162 In this case, NULL is also passed as ilocal to the resulting SF. 2163 2164 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2165 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2166 2167 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2168 @*/ 2169 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) { 2170 PetscInt i, s, nLeaves, nRoots; 2171 PetscInt *leafArrayOffsets; 2172 PetscInt *ilocal_new; 2173 PetscSFNode *iremote_new; 2174 PetscInt *rootOffsets; 2175 PetscBool all_ilocal_null = PETSC_FALSE; 2176 PetscMPIInt rank; 2177 2178 PetscFunctionBegin; 2179 { 2180 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2181 2182 PetscCall(PetscSFCreate(comm, &dummy)); 2183 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2184 PetscValidPointer(sfs, 3); 2185 for (i = 0; i < nsfs; i++) { 2186 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2187 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2188 } 2189 PetscValidLogicalCollectiveBool(dummy, shareRoots, 4); 2190 if (leafOffsets) PetscValidIntPointer(leafOffsets, 5); 2191 PetscValidPointer(newsf, 6); 2192 PetscCall(PetscSFDestroy(&dummy)); 2193 } 2194 if (!nsfs) { 2195 PetscCall(PetscSFCreate(comm, newsf)); 2196 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2197 PetscFunctionReturn(0); 2198 } 2199 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2200 2201 PetscCall(PetscCalloc1(nsfs + 1, &rootOffsets)); 2202 if (shareRoots) { 2203 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2204 if (PetscDefined(USE_DEBUG)) { 2205 for (s = 1; s < nsfs; s++) { 2206 PetscInt nr; 2207 2208 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2209 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2210 } 2211 } 2212 } else { 2213 for (s = 0; s < nsfs; s++) { 2214 PetscInt nr; 2215 2216 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2217 rootOffsets[s + 1] = rootOffsets[s] + nr; 2218 } 2219 nRoots = rootOffsets[nsfs]; 2220 } 2221 2222 /* Calculate leaf array offsets and automatic root offsets */ 2223 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2224 leafArrayOffsets[0] = 0; 2225 for (s = 0; s < nsfs; s++) { 2226 PetscInt nl; 2227 2228 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2229 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2230 } 2231 nLeaves = leafArrayOffsets[nsfs]; 2232 2233 if (!leafOffsets) { 2234 all_ilocal_null = PETSC_TRUE; 2235 for (s = 0; s < nsfs; s++) { 2236 const PetscInt *ilocal; 2237 2238 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2239 if (ilocal) { 2240 all_ilocal_null = PETSC_FALSE; 2241 break; 2242 } 2243 } 2244 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2245 } 2246 2247 /* Renumber and concatenate local leaves */ 2248 ilocal_new = NULL; 2249 if (!all_ilocal_null) { 2250 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2251 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2252 for (s = 0; s < nsfs; s++) { 2253 const PetscInt *ilocal; 2254 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2255 PetscInt i, nleaves_l; 2256 2257 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2258 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2259 } 2260 } 2261 2262 /* Renumber and concatenate remote roots */ 2263 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2264 for (i = 0; i < nLeaves; i++) { 2265 iremote_new[i].rank = -1; 2266 iremote_new[i].index = -1; 2267 } 2268 for (s = 0; s < nsfs; s++) { 2269 PetscInt i, nl, nr; 2270 PetscSF tmp_sf; 2271 const PetscSFNode *iremote; 2272 PetscSFNode *tmp_rootdata; 2273 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2274 2275 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2276 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2277 /* create helper SF with contiguous leaves */ 2278 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2279 PetscCall(PetscSFSetUp(tmp_sf)); 2280 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2281 for (i = 0; i < nr; i++) { 2282 tmp_rootdata[i].index = i + rootOffsets[s]; 2283 tmp_rootdata[i].rank = (PetscInt)rank; 2284 } 2285 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2286 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2287 PetscCall(PetscSFDestroy(&tmp_sf)); 2288 PetscCall(PetscFree(tmp_rootdata)); 2289 } 2290 2291 /* Build the new SF */ 2292 PetscCall(PetscSFCreate(comm, newsf)); 2293 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2294 PetscCall(PetscSFSetUp(*newsf)); 2295 PetscCall(PetscFree(rootOffsets)); 2296 PetscCall(PetscFree(leafArrayOffsets)); 2297 PetscFunctionReturn(0); 2298 } 2299