1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME); 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 . -sf_type type - value of type may be 42 .vb 43 basic -Use MPI persistent Isend/Irecv for communication (Default) 44 window -Use MPI-3 one-sided window for communication 45 neighbor -Use MPI-3 neighborhood collectives for communication 46 .ve 47 48 Level: intermediate 49 50 Note: 51 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 52 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 53 `SF`s are optimized and they have better performance than general `SF`s. 54 55 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 56 @*/ 57 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 58 { 59 PetscSF b; 60 61 PetscFunctionBegin; 62 PetscValidPointer(sf, 2); 63 PetscCall(PetscSFInitializePackage()); 64 65 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 66 67 b->nroots = -1; 68 b->nleaves = -1; 69 b->minleaf = PETSC_MAX_INT; 70 b->maxleaf = PETSC_MIN_INT; 71 b->nranks = -1; 72 b->rankorder = PETSC_TRUE; 73 b->ingroup = MPI_GROUP_NULL; 74 b->outgroup = MPI_GROUP_NULL; 75 b->graphset = PETSC_FALSE; 76 #if defined(PETSC_HAVE_DEVICE) 77 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 78 b->use_stream_aware_mpi = PETSC_FALSE; 79 b->unknown_input_stream = PETSC_FALSE; 80 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 81 b->backend = PETSCSF_BACKEND_KOKKOS; 82 #elif defined(PETSC_HAVE_CUDA) 83 b->backend = PETSCSF_BACKEND_CUDA; 84 #elif defined(PETSC_HAVE_HIP) 85 b->backend = PETSCSF_BACKEND_HIP; 86 #endif 87 88 #if defined(PETSC_HAVE_NVSHMEM) 89 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 90 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 91 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 92 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 93 #endif 94 #endif 95 b->vscat.from_n = -1; 96 b->vscat.to_n = -1; 97 b->vscat.unit = MPIU_SCALAR; 98 *sf = b; 99 PetscFunctionReturn(0); 100 } 101 102 /*@ 103 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 104 105 Collective 106 107 Input Parameter: 108 . sf - star forest 109 110 Level: advanced 111 112 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 113 @*/ 114 PetscErrorCode PetscSFReset(PetscSF sf) 115 { 116 PetscFunctionBegin; 117 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 118 PetscTryTypeMethod(sf, Reset); 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_MAX_INT; 122 sf->maxleaf = PETSC_MIN_INT; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 PetscCall(PetscFree(sf->mine_alloc)); 127 PetscCall(PetscFree(sf->remote_alloc)); 128 sf->nranks = -1; 129 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 130 sf->degreeknown = PETSC_FALSE; 131 PetscCall(PetscFree(sf->degree)); 132 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 133 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 134 if (sf->multi) sf->multi->multi = NULL; 135 PetscCall(PetscSFDestroy(&sf->multi)); 136 PetscCall(PetscLayoutDestroy(&sf->map)); 137 138 #if defined(PETSC_HAVE_DEVICE) 139 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 140 #endif 141 142 sf->setupcalled = PETSC_FALSE; 143 PetscFunctionReturn(0); 144 } 145 146 /*@C 147 PetscSFSetType - Set the `PetscSF` communication implementation 148 149 Collective on sf 150 151 Input Parameters: 152 + sf - the `PetscSF` context 153 - type - a known method 154 .vb 155 PETSCSFWINDOW - MPI-2/3 one-sided 156 PETSCSFBASIC - basic implementation using MPI-1 two-sided 157 .ve 158 159 Options Database Key: 160 . -sf_type <type> - Sets the method; for example basic or window use -help for a list of available methods (for instance, window, basic, neighbor) 161 162 Level: intermediate 163 164 Notes: 165 See "include/petscsf.h" for available methods (for instance) 166 167 .seealso: `PetscSFType`, `PetscSFCreate()` 168 @*/ 169 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 170 { 171 PetscBool match; 172 PetscErrorCode (*r)(PetscSF); 173 174 PetscFunctionBegin; 175 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 176 PetscValidCharPointer(type, 2); 177 178 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 179 if (match) PetscFunctionReturn(0); 180 181 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 182 PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 183 /* Destroy the previous PetscSF implementation context */ 184 PetscTryTypeMethod(sf, Destroy); 185 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 186 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 187 PetscCall((*r)(sf)); 188 PetscFunctionReturn(0); 189 } 190 191 /*@C 192 PetscSFGetType - Get the `PetscSF` communication implementation 193 194 Not Collective 195 196 Input Parameter: 197 . sf - the `PetscSF` context 198 199 Output Parameter: 200 . type - the `PetscSF` type name 201 202 Level: intermediate 203 204 .seealso: `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 205 @*/ 206 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 207 { 208 PetscFunctionBegin; 209 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 210 PetscValidPointer(type, 2); 211 *type = ((PetscObject)sf)->type_name; 212 PetscFunctionReturn(0); 213 } 214 215 /*@C 216 PetscSFDestroy - destroy star forest 217 218 Collective 219 220 Input Parameter: 221 . sf - address of star forest 222 223 Level: intermediate 224 225 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 226 @*/ 227 PetscErrorCode PetscSFDestroy(PetscSF *sf) 228 { 229 PetscFunctionBegin; 230 if (!*sf) PetscFunctionReturn(0); 231 PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1); 232 if (--((PetscObject)(*sf))->refct > 0) { 233 *sf = NULL; 234 PetscFunctionReturn(0); 235 } 236 PetscCall(PetscSFReset(*sf)); 237 PetscTryTypeMethod((*sf), Destroy); 238 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 239 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 240 PetscCall(PetscHeaderDestroy(sf)); 241 PetscFunctionReturn(0); 242 } 243 244 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 245 { 246 PetscInt i, nleaves; 247 PetscMPIInt size; 248 const PetscInt *ilocal; 249 const PetscSFNode *iremote; 250 251 PetscFunctionBegin; 252 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 253 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 254 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 255 for (i = 0; i < nleaves; i++) { 256 const PetscInt rank = iremote[i].rank; 257 const PetscInt remote = iremote[i].index; 258 const PetscInt leaf = ilocal ? ilocal[i] : i; 259 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 260 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 261 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 262 } 263 PetscFunctionReturn(0); 264 } 265 266 /*@ 267 PetscSFSetUp - set up communication structures 268 269 Collective 270 271 Input Parameter: 272 . sf - star forest communication object 273 274 Level: beginner 275 276 .seealso: `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 277 @*/ 278 PetscErrorCode PetscSFSetUp(PetscSF sf) 279 { 280 PetscFunctionBegin; 281 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 282 PetscSFCheckGraphSet(sf, 1); 283 if (sf->setupcalled) PetscFunctionReturn(0); 284 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 285 PetscCall(PetscSFCheckGraphValid_Private(sf)); 286 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 287 PetscTryTypeMethod(sf, SetUp); 288 #if defined(PETSC_HAVE_CUDA) 289 if (sf->backend == PETSCSF_BACKEND_CUDA) { 290 sf->ops->Malloc = PetscSFMalloc_CUDA; 291 sf->ops->Free = PetscSFFree_CUDA; 292 } 293 #endif 294 #if defined(PETSC_HAVE_HIP) 295 if (sf->backend == PETSCSF_BACKEND_HIP) { 296 sf->ops->Malloc = PetscSFMalloc_HIP; 297 sf->ops->Free = PetscSFFree_HIP; 298 } 299 #endif 300 301 # 302 #if defined(PETSC_HAVE_KOKKOS) 303 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 304 sf->ops->Malloc = PetscSFMalloc_Kokkos; 305 sf->ops->Free = PetscSFFree_Kokkos; 306 } 307 #endif 308 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 309 sf->setupcalled = PETSC_TRUE; 310 PetscFunctionReturn(0); 311 } 312 313 /*@ 314 PetscSFSetFromOptions - set `PetscSF` options using the options database 315 316 Logically Collective 317 318 Input Parameter: 319 . sf - star forest 320 321 Options Database Keys: 322 + -sf_type - implementation type, see PetscSFSetType() 323 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 324 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 325 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 326 If true, this option only works with -use_gpu_aware_mpi 1. 327 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 328 If true, this option only works with -use_gpu_aware_mpi 1. 329 330 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 331 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 332 the only available is kokkos. 333 334 Level: intermediate 335 336 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 337 @*/ 338 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 339 { 340 PetscSFType deft; 341 char type[256]; 342 PetscBool flg; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 346 PetscObjectOptionsBegin((PetscObject)sf); 347 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 348 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 349 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 350 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 351 #if defined(PETSC_HAVE_DEVICE) 352 { 353 char backendstr[32] = {0}; 354 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 355 /* Change the defaults set in PetscSFCreate() with command line options */ 356 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 357 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 358 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 359 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 360 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 361 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 362 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 363 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 364 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 365 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 366 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 367 #elif defined(PETSC_HAVE_KOKKOS) 368 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 369 #endif 370 } 371 #endif 372 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 373 PetscOptionsEnd(); 374 PetscFunctionReturn(0); 375 } 376 377 /*@ 378 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 379 380 Logically Collective 381 382 Input Parameters: 383 + sf - star forest 384 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 385 386 Level: advanced 387 388 .seealso: `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 389 @*/ 390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 391 { 392 PetscFunctionBegin; 393 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 394 PetscValidLogicalCollectiveBool(sf, flg, 2); 395 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 396 sf->rankorder = flg; 397 PetscFunctionReturn(0); 398 } 399 400 /*@C 401 PetscSFSetGraph - Set a parallel star forest 402 403 Collective 404 405 Input Parameters: 406 + sf - star forest 407 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 408 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 409 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 410 during setup in debug mode) 411 . localmode - copy mode for ilocal 412 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 413 during setup in debug mode) 414 - remotemode - copy mode for iremote 415 416 Level: intermediate 417 418 Notes: 419 Leaf indices in ilocal must be unique, otherwise an error occurs. 420 421 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 422 In particular, if localmode/remotemode is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 423 PETSc might modify the respective array; 424 if `PETSC_USE_POINTER`, the user must delete the array after PetscSFDestroy(). 425 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 426 427 Fortran Note: 428 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 429 430 Developer Note: 431 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 432 This also allows to compare leaf sets of two SFs easily. 433 434 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 435 @*/ 436 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 437 { 438 PetscBool unique, contiguous; 439 440 PetscFunctionBegin; 441 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 442 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4); 443 if (nleaves > 0) PetscValidPointer(iremote, 6); 444 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 445 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 446 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 447 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 448 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 449 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 450 451 if (sf->nroots >= 0) { /* Reset only if graph already set */ 452 PetscCall(PetscSFReset(sf)); 453 } 454 455 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 456 457 sf->nroots = nroots; 458 sf->nleaves = nleaves; 459 460 if (localmode == PETSC_COPY_VALUES && ilocal) { 461 PetscInt *tlocal = NULL; 462 463 PetscCall(PetscMalloc1(nleaves, &tlocal)); 464 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 465 ilocal = tlocal; 466 } 467 if (remotemode == PETSC_COPY_VALUES) { 468 PetscSFNode *tremote = NULL; 469 470 PetscCall(PetscMalloc1(nleaves, &tremote)); 471 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 472 iremote = tremote; 473 } 474 475 if (nleaves && ilocal) { 476 PetscSFNode work; 477 478 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 479 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 480 unique = PetscNot(unique); 481 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 482 sf->minleaf = ilocal[0]; 483 sf->maxleaf = ilocal[nleaves - 1]; 484 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 485 } else { 486 sf->minleaf = 0; 487 sf->maxleaf = nleaves - 1; 488 unique = PETSC_TRUE; 489 contiguous = PETSC_TRUE; 490 } 491 492 if (contiguous) { 493 if (localmode == PETSC_USE_POINTER) { 494 ilocal = NULL; 495 } else { 496 PetscCall(PetscFree(ilocal)); 497 } 498 } 499 sf->mine = ilocal; 500 if (localmode == PETSC_USE_POINTER) { 501 sf->mine_alloc = NULL; 502 } else { 503 sf->mine_alloc = ilocal; 504 } 505 sf->remote = iremote; 506 if (remotemode == PETSC_USE_POINTER) { 507 sf->remote_alloc = NULL; 508 } else { 509 sf->remote_alloc = iremote; 510 } 511 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 512 sf->graphset = PETSC_TRUE; 513 PetscFunctionReturn(0); 514 } 515 516 /*@ 517 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 518 519 Collective 520 521 Input Parameters: 522 + sf - The `PetscSF` 523 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 524 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 525 526 Level: intermediate 527 528 Notes: 529 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector x and its layout is map. 530 n and N are local and global sizes of x respectively. 531 532 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to 533 sequential vectors y on all ranks. 534 535 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to a 536 sequential vector y on rank 0. 537 538 In above cases, entries of x are roots and entries of y are leaves. 539 540 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of sf's communicator. The routine 541 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 542 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 543 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 544 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 545 546 In this case, roots and leaves are symmetric. 547 548 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 549 @*/ 550 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 551 { 552 MPI_Comm comm; 553 PetscInt n, N, res[2]; 554 PetscMPIInt rank, size; 555 PetscSFType type; 556 557 PetscFunctionBegin; 558 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 559 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2); 560 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 561 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 562 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 563 PetscCallMPI(MPI_Comm_size(comm, &size)); 564 565 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 566 type = PETSCSFALLTOALL; 567 PetscCall(PetscLayoutCreate(comm, &sf->map)); 568 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 569 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 570 PetscCall(PetscLayoutSetUp(sf->map)); 571 } else { 572 PetscCall(PetscLayoutGetLocalSize(map, &n)); 573 PetscCall(PetscLayoutGetSize(map, &N)); 574 res[0] = n; 575 res[1] = -n; 576 /* Check if n are same over all ranks so that we can optimize it */ 577 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 578 if (res[0] == -res[1]) { /* same n */ 579 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 580 } else { 581 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 582 } 583 PetscCall(PetscLayoutReference(map, &sf->map)); 584 } 585 PetscCall(PetscSFSetType(sf, type)); 586 587 sf->pattern = pattern; 588 sf->mine = NULL; /* Contiguous */ 589 590 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 591 Also set other easy stuff. 592 */ 593 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 594 sf->nleaves = N; 595 sf->nroots = n; 596 sf->nranks = size; 597 sf->minleaf = 0; 598 sf->maxleaf = N - 1; 599 } else if (pattern == PETSCSF_PATTERN_GATHER) { 600 sf->nleaves = rank ? 0 : N; 601 sf->nroots = n; 602 sf->nranks = rank ? 0 : size; 603 sf->minleaf = 0; 604 sf->maxleaf = rank ? -1 : N - 1; 605 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 606 sf->nleaves = size; 607 sf->nroots = size; 608 sf->nranks = size; 609 sf->minleaf = 0; 610 sf->maxleaf = size - 1; 611 } 612 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 613 sf->graphset = PETSC_TRUE; 614 PetscFunctionReturn(0); 615 } 616 617 /*@ 618 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 619 620 Collective 621 622 Input Parameter: 623 . sf - star forest to invert 624 625 Output Parameter: 626 . isf - inverse of sf 627 628 Level: advanced 629 630 Notes: 631 All roots must have degree 1. 632 633 The local space may be a permutation, but cannot be sparse. 634 635 .seealso: `PetscSFType`, `PetscSFSetGraph()` 636 @*/ 637 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 638 { 639 PetscMPIInt rank; 640 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 641 const PetscInt *ilocal; 642 PetscSFNode *roots, *leaves; 643 644 PetscFunctionBegin; 645 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 646 PetscSFCheckGraphSet(sf, 1); 647 PetscValidPointer(isf, 2); 648 649 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 650 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 651 652 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 653 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 654 for (i = 0; i < maxlocal; i++) { 655 leaves[i].rank = rank; 656 leaves[i].index = i; 657 } 658 for (i = 0; i < nroots; i++) { 659 roots[i].rank = -1; 660 roots[i].index = -1; 661 } 662 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 663 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 664 665 /* Check whether our leaves are sparse */ 666 for (i = 0, count = 0; i < nroots; i++) 667 if (roots[i].rank >= 0) count++; 668 if (count == nroots) newilocal = NULL; 669 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 670 for (i = 0, count = 0; i < nroots; i++) { 671 if (roots[i].rank >= 0) { 672 newilocal[count] = i; 673 roots[count].rank = roots[i].rank; 674 roots[count].index = roots[i].index; 675 count++; 676 } 677 } 678 } 679 680 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 681 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 682 PetscCall(PetscFree2(roots, leaves)); 683 PetscFunctionReturn(0); 684 } 685 686 /*@ 687 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 688 689 Collective 690 691 Input Parameters: 692 + sf - communication object to duplicate 693 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 694 695 Output Parameter: 696 . newsf - new communication object 697 698 Level: beginner 699 700 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 701 @*/ 702 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 703 { 704 PetscSFType type; 705 MPI_Datatype dtype = MPIU_SCALAR; 706 707 PetscFunctionBegin; 708 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 709 PetscValidLogicalCollectiveEnum(sf, opt, 2); 710 PetscValidPointer(newsf, 3); 711 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 712 PetscCall(PetscSFGetType(sf, &type)); 713 if (type) PetscCall(PetscSFSetType(*newsf, type)); 714 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */ 715 if (opt == PETSCSF_DUPLICATE_GRAPH) { 716 PetscSFCheckGraphSet(sf, 1); 717 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 718 PetscInt nroots, nleaves; 719 const PetscInt *ilocal; 720 const PetscSFNode *iremote; 721 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 722 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 723 } else { 724 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 725 } 726 } 727 /* Since oldtype is committed, so is newtype, according to MPI */ 728 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 729 (*newsf)->vscat.bs = sf->vscat.bs; 730 (*newsf)->vscat.unit = dtype; 731 (*newsf)->vscat.to_n = sf->vscat.to_n; 732 (*newsf)->vscat.from_n = sf->vscat.from_n; 733 /* Do not copy lsf. Build it on demand since it is rarely used */ 734 735 #if defined(PETSC_HAVE_DEVICE) 736 (*newsf)->backend = sf->backend; 737 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 738 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 739 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 740 #endif 741 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 742 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 743 PetscFunctionReturn(0); 744 } 745 746 /*@C 747 PetscSFGetGraph - Get the graph specifying a parallel star forest 748 749 Not Collective 750 751 Input Parameter: 752 . sf - star forest 753 754 Output Parameters: 755 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 756 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 757 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 758 - iremote - remote locations of root vertices for each leaf on the current process 759 760 Level: intermediate 761 762 Notes: 763 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 764 765 The returned ilocal/iremote might contain values in different order than the input ones in `PetscSFSetGraph()`, 766 see its manpage for details. 767 768 Fortran Notes: 769 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 770 want to update the graph, you must call `PetscSFSetGraph()` after modifying the iremote array. 771 772 To check for a NULL ilocal use 773 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 774 775 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 776 @*/ 777 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 778 { 779 PetscFunctionBegin; 780 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 781 if (sf->ops->GetGraph) { 782 PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote)); 783 } else { 784 if (nroots) *nroots = sf->nroots; 785 if (nleaves) *nleaves = sf->nleaves; 786 if (ilocal) *ilocal = sf->mine; 787 if (iremote) *iremote = sf->remote; 788 } 789 PetscFunctionReturn(0); 790 } 791 792 /*@ 793 PetscSFGetLeafRange - Get the active leaf ranges 794 795 Not Collective 796 797 Input Parameter: 798 . sf - star forest 799 800 Output Parameters: 801 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 802 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 803 804 Level: developer 805 806 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 807 @*/ 808 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 809 { 810 PetscFunctionBegin; 811 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 812 PetscSFCheckGraphSet(sf, 1); 813 if (minleaf) *minleaf = sf->minleaf; 814 if (maxleaf) *maxleaf = sf->maxleaf; 815 PetscFunctionReturn(0); 816 } 817 818 /*@C 819 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 820 821 Collective on A 822 823 Input Parameters: 824 + A - the star forest 825 . obj - Optional object that provides the prefix for the option names 826 - name - command line option 827 828 Level: intermediate 829 830 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 831 @*/ 832 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 833 { 834 PetscFunctionBegin; 835 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 836 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 837 PetscFunctionReturn(0); 838 } 839 840 /*@C 841 PetscSFView - view a star forest 842 843 Collective 844 845 Input Parameters: 846 + sf - star forest 847 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 848 849 Level: beginner 850 851 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 852 @*/ 853 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 854 { 855 PetscBool iascii; 856 PetscViewerFormat format; 857 858 PetscFunctionBegin; 859 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 860 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 861 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 862 PetscCheckSameComm(sf, 1, viewer, 2); 863 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 864 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 865 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 866 PetscMPIInt rank; 867 PetscInt ii, i, j; 868 869 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 870 PetscCall(PetscViewerASCIIPushTab(viewer)); 871 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 872 if (!sf->graphset) { 873 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 874 PetscCall(PetscViewerASCIIPopTab(viewer)); 875 PetscFunctionReturn(0); 876 } 877 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 878 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 879 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 880 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 881 PetscCall(PetscViewerFlush(viewer)); 882 PetscCall(PetscViewerGetFormat(viewer, &format)); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 PetscMPIInt *tmpranks, *perm; 885 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 886 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 887 for (i = 0; i < sf->nranks; i++) perm[i] = i; 888 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 889 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 890 for (ii = 0; ii < sf->nranks; ii++) { 891 i = perm[ii]; 892 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 893 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 894 } 895 PetscCall(PetscFree2(tmpranks, perm)); 896 } 897 PetscCall(PetscViewerFlush(viewer)); 898 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 899 } 900 PetscCall(PetscViewerASCIIPopTab(viewer)); 901 } 902 PetscTryTypeMethod(sf, View, viewer); 903 PetscFunctionReturn(0); 904 } 905 906 /*@C 907 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 908 909 Not Collective 910 911 Input Parameter: 912 . sf - star forest 913 914 Output Parameters: 915 + nranks - number of ranks referenced by local part 916 . ranks - array of ranks 917 . roffset - offset in rmine/rremote for each rank (length nranks+1) 918 . rmine - concatenated array holding local indices referencing each remote rank 919 - rremote - concatenated array holding remote indices referenced for each remote rank 920 921 Level: developer 922 923 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 924 @*/ 925 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 926 { 927 PetscFunctionBegin; 928 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 929 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 930 if (sf->ops->GetRootRanks) { 931 PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote)); 932 } else { 933 /* The generic implementation */ 934 if (nranks) *nranks = sf->nranks; 935 if (ranks) *ranks = sf->ranks; 936 if (roffset) *roffset = sf->roffset; 937 if (rmine) *rmine = sf->rmine; 938 if (rremote) *rremote = sf->rremote; 939 } 940 PetscFunctionReturn(0); 941 } 942 943 /*@C 944 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 945 946 Not Collective 947 948 Input Parameter: 949 . sf - star forest 950 951 Output Parameters: 952 + niranks - number of leaf ranks referencing roots on this process 953 . iranks - array of ranks 954 . ioffset - offset in irootloc for each rank (length niranks+1) 955 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 956 957 Level: developer 958 959 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 960 @*/ 961 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 962 { 963 PetscFunctionBegin; 964 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 965 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 966 if (sf->ops->GetLeafRanks) { 967 PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc)); 968 } else { 969 PetscSFType type; 970 PetscCall(PetscSFGetType(sf, &type)); 971 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 972 } 973 PetscFunctionReturn(0); 974 } 975 976 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 977 { 978 PetscInt i; 979 for (i = 0; i < n; i++) { 980 if (needle == list[i]) return PETSC_TRUE; 981 } 982 return PETSC_FALSE; 983 } 984 985 /*@C 986 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 987 988 Collective 989 990 Input Parameters: 991 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 992 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 993 994 Level: developer 995 996 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 997 @*/ 998 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 999 { 1000 PetscTable table; 1001 PetscTablePosition pos; 1002 PetscMPIInt size, groupsize, *groupranks; 1003 PetscInt *rcount, *ranks; 1004 PetscInt i, irank = -1, orank = -1; 1005 1006 PetscFunctionBegin; 1007 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1008 PetscSFCheckGraphSet(sf, 1); 1009 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1010 PetscCall(PetscTableCreate(10, size, &table)); 1011 for (i = 0; i < sf->nleaves; i++) { 1012 /* Log 1-based rank */ 1013 PetscCall(PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1014 } 1015 PetscCall(PetscTableGetCount(table, &sf->nranks)); 1016 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1017 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1018 PetscCall(PetscTableGetHeadPosition(table, &pos)); 1019 for (i = 0; i < sf->nranks; i++) { 1020 PetscCall(PetscTableGetNext(table, &pos, &ranks[i], &rcount[i])); 1021 ranks[i]--; /* Convert back to 0-based */ 1022 } 1023 PetscCall(PetscTableDestroy(&table)); 1024 1025 /* We expect that dgroup is reliably "small" while nranks could be large */ 1026 { 1027 MPI_Group group = MPI_GROUP_NULL; 1028 PetscMPIInt *dgroupranks; 1029 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1030 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1031 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1032 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1033 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1034 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1035 PetscCallMPI(MPI_Group_free(&group)); 1036 PetscCall(PetscFree(dgroupranks)); 1037 } 1038 1039 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1040 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1041 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1042 if (InList(ranks[i], groupsize, groupranks)) break; 1043 } 1044 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1045 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1046 } 1047 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1048 PetscInt tmprank, tmpcount; 1049 1050 tmprank = ranks[i]; 1051 tmpcount = rcount[i]; 1052 ranks[i] = ranks[sf->ndranks]; 1053 rcount[i] = rcount[sf->ndranks]; 1054 ranks[sf->ndranks] = tmprank; 1055 rcount[sf->ndranks] = tmpcount; 1056 sf->ndranks++; 1057 } 1058 } 1059 PetscCall(PetscFree(groupranks)); 1060 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1061 PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1062 sf->roffset[0] = 0; 1063 for (i = 0; i < sf->nranks; i++) { 1064 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1065 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1066 rcount[i] = 0; 1067 } 1068 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1069 /* short circuit */ 1070 if (orank != sf->remote[i].rank) { 1071 /* Search for index of iremote[i].rank in sf->ranks */ 1072 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1073 if (irank < 0) { 1074 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1075 if (irank >= 0) irank += sf->ndranks; 1076 } 1077 orank = sf->remote[i].rank; 1078 } 1079 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1080 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1081 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1082 rcount[irank]++; 1083 } 1084 PetscCall(PetscFree2(rcount, ranks)); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /*@C 1089 PetscSFGetGroups - gets incoming and outgoing process groups 1090 1091 Collective 1092 1093 Input Parameter: 1094 . sf - star forest 1095 1096 Output Parameters: 1097 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1098 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1099 1100 Level: developer 1101 1102 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1103 @*/ 1104 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1105 { 1106 MPI_Group group = MPI_GROUP_NULL; 1107 1108 PetscFunctionBegin; 1109 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1110 if (sf->ingroup == MPI_GROUP_NULL) { 1111 PetscInt i; 1112 const PetscInt *indegree; 1113 PetscMPIInt rank, *outranks, *inranks; 1114 PetscSFNode *remote; 1115 PetscSF bgcount; 1116 1117 /* Compute the number of incoming ranks */ 1118 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1119 for (i = 0; i < sf->nranks; i++) { 1120 remote[i].rank = sf->ranks[i]; 1121 remote[i].index = 0; 1122 } 1123 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1124 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1125 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1126 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1127 /* Enumerate the incoming ranks */ 1128 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1129 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1130 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1131 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1132 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1133 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1134 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1135 PetscCallMPI(MPI_Group_free(&group)); 1136 PetscCall(PetscFree2(inranks, outranks)); 1137 PetscCall(PetscSFDestroy(&bgcount)); 1138 } 1139 *incoming = sf->ingroup; 1140 1141 if (sf->outgroup == MPI_GROUP_NULL) { 1142 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1143 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1144 PetscCallMPI(MPI_Group_free(&group)); 1145 } 1146 *outgoing = sf->outgroup; 1147 PetscFunctionReturn(0); 1148 } 1149 1150 /*@ 1151 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1152 1153 Collective 1154 1155 Input Parameter: 1156 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1157 1158 Output Parameter: 1159 . multi - star forest with split roots, such that each root has degree exactly 1 1160 1161 Level: developer 1162 1163 Note: 1164 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1165 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1166 edge, it is a candidate for future optimization that might involve its removal. 1167 1168 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1169 @*/ 1170 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1171 { 1172 PetscFunctionBegin; 1173 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1174 PetscValidPointer(multi, 2); 1175 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1176 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1177 *multi = sf->multi; 1178 sf->multi->multi = sf->multi; 1179 PetscFunctionReturn(0); 1180 } 1181 if (!sf->multi) { 1182 const PetscInt *indegree; 1183 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1184 PetscSFNode *remote; 1185 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1186 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1187 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1188 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1189 inoffset[0] = 0; 1190 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1191 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1192 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1193 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1194 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1195 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1196 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1197 } 1198 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1199 for (i = 0; i < sf->nleaves; i++) { 1200 remote[i].rank = sf->remote[i].rank; 1201 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1202 } 1203 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1204 sf->multi->multi = sf->multi; 1205 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1206 if (sf->rankorder) { /* Sort the ranks */ 1207 PetscMPIInt rank; 1208 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1209 PetscSFNode *newremote; 1210 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1211 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1212 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1213 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1214 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1215 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1216 /* Sort the incoming ranks at each vertex, build the inverse map */ 1217 for (i = 0; i < sf->nroots; i++) { 1218 PetscInt j; 1219 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1220 PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset)); 1221 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1222 } 1223 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1224 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1225 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1226 for (i = 0; i < sf->nleaves; i++) { 1227 newremote[i].rank = sf->remote[i].rank; 1228 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1229 } 1230 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1231 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1232 } 1233 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1234 } 1235 *multi = sf->multi; 1236 PetscFunctionReturn(0); 1237 } 1238 1239 /*@C 1240 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1241 1242 Collective 1243 1244 Input Parameters: 1245 + sf - original star forest 1246 . nselected - number of selected roots on this process 1247 - selected - indices of the selected roots on this process 1248 1249 Output Parameter: 1250 . esf - new star forest 1251 1252 Level: advanced 1253 1254 Note: 1255 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1256 be done by calling PetscSFGetGraph(). 1257 1258 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1259 @*/ 1260 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1261 { 1262 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1263 const PetscInt *ilocal; 1264 signed char *rootdata, *leafdata, *leafmem; 1265 const PetscSFNode *iremote; 1266 PetscSFNode *new_iremote; 1267 MPI_Comm comm; 1268 1269 PetscFunctionBegin; 1270 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1271 PetscSFCheckGraphSet(sf, 1); 1272 if (nselected) PetscValidIntPointer(selected, 3); 1273 PetscValidPointer(esf, 4); 1274 1275 PetscCall(PetscSFSetUp(sf)); 1276 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1277 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1278 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1279 1280 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1281 PetscBool dups; 1282 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1283 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1284 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1285 } 1286 1287 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1288 else { 1289 /* A generic version of creating embedded sf */ 1290 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1291 maxlocal = maxleaf - minleaf + 1; 1292 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1293 leafdata = leafmem - minleaf; 1294 /* Tag selected roots and bcast to leaves */ 1295 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1296 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1297 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1298 1299 /* Build esf with leaves that are still connected */ 1300 esf_nleaves = 0; 1301 for (i = 0; i < nleaves; i++) { 1302 j = ilocal ? ilocal[i] : i; 1303 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1304 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1305 */ 1306 esf_nleaves += (leafdata[j] ? 1 : 0); 1307 } 1308 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1309 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1310 for (i = n = 0; i < nleaves; i++) { 1311 j = ilocal ? ilocal[i] : i; 1312 if (leafdata[j]) { 1313 new_ilocal[n] = j; 1314 new_iremote[n].rank = iremote[i].rank; 1315 new_iremote[n].index = iremote[i].index; 1316 ++n; 1317 } 1318 } 1319 PetscCall(PetscSFCreate(comm, esf)); 1320 PetscCall(PetscSFSetFromOptions(*esf)); 1321 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1322 PetscCall(PetscFree2(rootdata, leafmem)); 1323 } 1324 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 /*@C 1329 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1330 1331 Collective 1332 1333 Input Parameters: 1334 + sf - original star forest 1335 . nselected - number of selected leaves on this process 1336 - selected - indices of the selected leaves on this process 1337 1338 Output Parameter: 1339 . newsf - new star forest 1340 1341 Level: advanced 1342 1343 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1344 @*/ 1345 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1346 { 1347 const PetscSFNode *iremote; 1348 PetscSFNode *new_iremote; 1349 const PetscInt *ilocal; 1350 PetscInt i, nroots, *leaves, *new_ilocal; 1351 MPI_Comm comm; 1352 1353 PetscFunctionBegin; 1354 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1355 PetscSFCheckGraphSet(sf, 1); 1356 if (nselected) PetscValidIntPointer(selected, 3); 1357 PetscValidPointer(newsf, 4); 1358 1359 /* Uniq selected[] and put results in leaves[] */ 1360 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1361 PetscCall(PetscMalloc1(nselected, &leaves)); 1362 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1363 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1364 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1365 1366 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1367 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1368 else { 1369 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1370 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1371 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1372 for (i = 0; i < nselected; ++i) { 1373 const PetscInt l = leaves[i]; 1374 new_ilocal[i] = ilocal ? ilocal[l] : l; 1375 new_iremote[i].rank = iremote[l].rank; 1376 new_iremote[i].index = iremote[l].index; 1377 } 1378 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1379 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1380 } 1381 PetscCall(PetscFree(leaves)); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 /*@C 1386 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1387 1388 Collective on sf 1389 1390 Input Parameters: 1391 + sf - star forest on which to communicate 1392 . unit - data type associated with each node 1393 . rootdata - buffer to broadcast 1394 - op - operation to use for reduction 1395 1396 Output Parameter: 1397 . leafdata - buffer to be reduced with values from each leaf's respective root 1398 1399 Level: intermediate 1400 1401 Notes: 1402 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1403 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1404 use `PetscSFBcastWithMemTypeBegin()` instead. 1405 1406 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1407 @*/ 1408 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1409 { 1410 PetscMemType rootmtype, leafmtype; 1411 1412 PetscFunctionBegin; 1413 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1414 PetscCall(PetscSFSetUp(sf)); 1415 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1416 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1417 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1418 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1419 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 /*@C 1424 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to `PetscSFBcastEnd()` 1425 1426 Collective on sf 1427 1428 Input Parameters: 1429 + sf - star forest on which to communicate 1430 . unit - data type associated with each node 1431 . rootmtype - memory type of rootdata 1432 . rootdata - buffer to broadcast 1433 . leafmtype - memory type of leafdata 1434 - op - operation to use for reduction 1435 1436 Output Parameter: 1437 . leafdata - buffer to be reduced with values from each leaf's respective root 1438 1439 Level: intermediate 1440 1441 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1442 @*/ 1443 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1444 { 1445 PetscFunctionBegin; 1446 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1447 PetscCall(PetscSFSetUp(sf)); 1448 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1449 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1450 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 /*@C 1455 PetscSFBcastEnd - end a broadcast & reduce operation started with `PetscSFBcastBegin()` 1456 1457 Collective 1458 1459 Input Parameters: 1460 + sf - star forest 1461 . unit - data type 1462 . rootdata - buffer to broadcast 1463 - op - operation to use for reduction 1464 1465 Output Parameter: 1466 . leafdata - buffer to be reduced with values from each leaf's respective root 1467 1468 Level: intermediate 1469 1470 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1471 @*/ 1472 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1473 { 1474 PetscFunctionBegin; 1475 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1476 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1477 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1478 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 /*@C 1483 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1484 1485 Collective 1486 1487 Input Parameters: 1488 + sf - star forest 1489 . unit - data type 1490 . leafdata - values to reduce 1491 - op - reduction operation 1492 1493 Output Parameter: 1494 . rootdata - result of reduction of values from all leaves of each root 1495 1496 Level: intermediate 1497 1498 Notes: 1499 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1500 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1501 use `PetscSFReduceWithMemTypeBegin()` instead. 1502 1503 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()` 1504 @*/ 1505 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1506 { 1507 PetscMemType rootmtype, leafmtype; 1508 1509 PetscFunctionBegin; 1510 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1511 PetscCall(PetscSFSetUp(sf)); 1512 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1513 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1514 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1515 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1516 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 /*@C 1521 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1522 1523 Collective 1524 1525 Input Parameters: 1526 + sf - star forest 1527 . unit - data type 1528 . leafmtype - memory type of leafdata 1529 . leafdata - values to reduce 1530 . rootmtype - memory type of rootdata 1531 - op - reduction operation 1532 1533 Output Parameter: 1534 . rootdata - result of reduction of values from all leaves of each root 1535 1536 Level: intermediate 1537 1538 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()` 1539 @*/ 1540 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1541 { 1542 PetscFunctionBegin; 1543 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1544 PetscCall(PetscSFSetUp(sf)); 1545 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1546 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1547 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1548 PetscFunctionReturn(0); 1549 } 1550 1551 /*@C 1552 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` 1553 1554 Collective 1555 1556 Input Parameters: 1557 + sf - star forest 1558 . unit - data type 1559 . leafdata - values to reduce 1560 - op - reduction operation 1561 1562 Output Parameter: 1563 . rootdata - result of reduction of values from all leaves of each root 1564 1565 Level: intermediate 1566 1567 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()` 1568 @*/ 1569 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1570 { 1571 PetscFunctionBegin; 1572 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1573 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1574 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1575 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 /*@C 1580 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1581 to be completed with `PetscSFFetchAndOpEnd()` 1582 1583 Collective 1584 1585 Input Parameters: 1586 + sf - star forest 1587 . unit - data type 1588 . leafdata - leaf values to use in reduction 1589 - op - operation to use for reduction 1590 1591 Output Parameters: 1592 + rootdata - root values to be updated, input state is seen by first process to perform an update 1593 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1594 1595 Level: advanced 1596 1597 Note: 1598 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1599 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1600 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1601 integers. 1602 1603 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1604 @*/ 1605 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1606 { 1607 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1608 1609 PetscFunctionBegin; 1610 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1611 PetscCall(PetscSFSetUp(sf)); 1612 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1613 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1614 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1615 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1616 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1617 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1618 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 /*@C 1623 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1624 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1625 1626 Collective 1627 1628 Input Parameters: 1629 + sf - star forest 1630 . unit - data type 1631 . rootmtype - memory type of rootdata 1632 . leafmtype - memory type of leafdata 1633 . leafdata - leaf values to use in reduction 1634 . leafupdatemtype - memory type of leafupdate 1635 - op - operation to use for reduction 1636 1637 Output Parameters: 1638 + rootdata - root values to be updated, input state is seen by first process to perform an update 1639 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1640 1641 Level: advanced 1642 1643 Note: 1644 See `PetscSFFetchAndOpBegin()` for more details. 1645 1646 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1647 @*/ 1648 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1649 { 1650 PetscFunctionBegin; 1651 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1652 PetscCall(PetscSFSetUp(sf)); 1653 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1654 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1655 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1656 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1657 PetscFunctionReturn(0); 1658 } 1659 1660 /*@C 1661 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1662 1663 Collective 1664 1665 Input Parameters: 1666 + sf - star forest 1667 . unit - data type 1668 . leafdata - leaf values to use in reduction 1669 - op - operation to use for reduction 1670 1671 Output Parameters: 1672 + rootdata - root values to be updated, input state is seen by first process to perform an update 1673 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1674 1675 Level: advanced 1676 1677 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()` 1678 @*/ 1679 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1680 { 1681 PetscFunctionBegin; 1682 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1683 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1684 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1685 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 /*@C 1690 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1691 1692 Collective 1693 1694 Input Parameter: 1695 . sf - star forest 1696 1697 Output Parameter: 1698 . degree - degree of each root vertex 1699 1700 Level: advanced 1701 1702 Note: 1703 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it. 1704 1705 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1706 @*/ 1707 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) 1708 { 1709 PetscFunctionBegin; 1710 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1711 PetscSFCheckGraphSet(sf, 1); 1712 PetscValidPointer(degree, 2); 1713 if (!sf->degreeknown) { 1714 PetscInt i, nroots = sf->nroots, maxlocal; 1715 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1716 maxlocal = sf->maxleaf - sf->minleaf + 1; 1717 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1718 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1719 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1720 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1721 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1722 } 1723 *degree = NULL; 1724 PetscFunctionReturn(0); 1725 } 1726 1727 /*@C 1728 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1729 1730 Collective 1731 1732 Input Parameter: 1733 . sf - star forest 1734 1735 Output Parameter: 1736 . degree - degree of each root vertex 1737 1738 Level: developer 1739 1740 Note: 1741 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it. 1742 1743 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1744 @*/ 1745 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1746 { 1747 PetscFunctionBegin; 1748 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1749 PetscSFCheckGraphSet(sf, 1); 1750 PetscValidPointer(degree, 2); 1751 if (!sf->degreeknown) { 1752 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1753 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1754 PetscCall(PetscFree(sf->degreetmp)); 1755 sf->degreeknown = PETSC_TRUE; 1756 } 1757 *degree = sf->degree; 1758 PetscFunctionReturn(0); 1759 } 1760 1761 /*@C 1762 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by `PetscSFGetMultiSF()`). 1763 Each multi-root is assigned index of the corresponding original root. 1764 1765 Collective 1766 1767 Input Parameters: 1768 + sf - star forest 1769 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1770 1771 Output Parameters: 1772 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1773 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1774 1775 Level: developer 1776 1777 Note: 1778 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1779 1780 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1781 @*/ 1782 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1783 { 1784 PetscSF msf; 1785 PetscInt i, j, k, nroots, nmroots; 1786 1787 PetscFunctionBegin; 1788 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1789 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1790 if (nroots) PetscValidIntPointer(degree, 2); 1791 if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3); 1792 PetscValidPointer(multiRootsOrigNumbering, 4); 1793 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1794 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1795 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1796 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1797 if (!degree[i]) continue; 1798 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1799 } 1800 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1801 if (nMultiRoots) *nMultiRoots = nmroots; 1802 PetscFunctionReturn(0); 1803 } 1804 1805 /*@C 1806 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1807 1808 Collective 1809 1810 Input Parameters: 1811 + sf - star forest 1812 . unit - data type 1813 - leafdata - leaf data to gather to roots 1814 1815 Output Parameter: 1816 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1817 1818 Level: intermediate 1819 1820 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1821 @*/ 1822 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1823 { 1824 PetscSF multi = NULL; 1825 1826 PetscFunctionBegin; 1827 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1828 PetscCall(PetscSFSetUp(sf)); 1829 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1830 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 /*@C 1835 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1836 1837 Collective 1838 1839 Input Parameters: 1840 + sf - star forest 1841 . unit - data type 1842 - leafdata - leaf data to gather to roots 1843 1844 Output Parameter: 1845 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1846 1847 Level: intermediate 1848 1849 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1850 @*/ 1851 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1852 { 1853 PetscSF multi = NULL; 1854 1855 PetscFunctionBegin; 1856 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1857 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1858 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 /*@C 1863 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1864 1865 Collective 1866 1867 Input Parameters: 1868 + sf - star forest 1869 . unit - data type 1870 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1871 1872 Output Parameter: 1873 . leafdata - leaf data to be update with personal data from each respective root 1874 1875 Level: intermediate 1876 1877 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1878 @*/ 1879 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1880 { 1881 PetscSF multi = NULL; 1882 1883 PetscFunctionBegin; 1884 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1885 PetscCall(PetscSFSetUp(sf)); 1886 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1887 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 /*@C 1892 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1893 1894 Collective 1895 1896 Input Parameters: 1897 + sf - star forest 1898 . unit - data type 1899 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1900 1901 Output Parameter: 1902 . leafdata - leaf data to be update with personal data from each respective root 1903 1904 Level: intermediate 1905 1906 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1907 @*/ 1908 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1909 { 1910 PetscSF multi = NULL; 1911 1912 PetscFunctionBegin; 1913 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1914 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1915 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1920 { 1921 PetscInt i, n, nleaves; 1922 const PetscInt *ilocal = NULL; 1923 PetscHSetI seen; 1924 1925 PetscFunctionBegin; 1926 if (PetscDefined(USE_DEBUG)) { 1927 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1928 PetscCall(PetscHSetICreate(&seen)); 1929 for (i = 0; i < nleaves; i++) { 1930 const PetscInt leaf = ilocal ? ilocal[i] : i; 1931 PetscCall(PetscHSetIAdd(seen, leaf)); 1932 } 1933 PetscCall(PetscHSetIGetSize(seen, &n)); 1934 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 1935 PetscCall(PetscHSetIDestroy(&seen)); 1936 } 1937 PetscFunctionReturn(0); 1938 } 1939 1940 /*@ 1941 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 1942 1943 Input Parameters: 1944 + sfA - The first `PetscSF` 1945 - sfB - The second `PetscSF` 1946 1947 Output Parameters: 1948 . sfBA - The composite `PetscSF` 1949 1950 Level: developer 1951 1952 Notes: 1953 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 1954 forests, i.e. the same leaf is not connected with different roots. 1955 1956 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1957 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1958 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1959 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1960 1961 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1962 @*/ 1963 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 1964 { 1965 const PetscSFNode *remotePointsA, *remotePointsB; 1966 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 1967 const PetscInt *localPointsA, *localPointsB; 1968 PetscInt *localPointsBA; 1969 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 1970 PetscBool denseB; 1971 1972 PetscFunctionBegin; 1973 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 1974 PetscSFCheckGraphSet(sfA, 1); 1975 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 1976 PetscSFCheckGraphSet(sfB, 2); 1977 PetscCheckSameComm(sfA, 1, sfB, 2); 1978 PetscValidPointer(sfBA, 3); 1979 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1980 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1981 1982 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 1983 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 1984 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size */ 1985 /* numRootsB; otherwise, garbage will be broadcasted. */ 1986 /* Example (comm size = 1): */ 1987 /* sfA: 0 <- (0, 0) */ 1988 /* sfB: 100 <- (0, 0) */ 1989 /* 101 <- (0, 1) */ 1990 /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget */ 1991 /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */ 1992 /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on */ 1993 /* remotePointsA; if not recasted, point 101 would receive a garbage value. */ 1994 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 1995 for (i = 0; i < numRootsB; i++) { 1996 reorderedRemotePointsA[i].rank = -1; 1997 reorderedRemotePointsA[i].index = -1; 1998 } 1999 for (i = 0; i < numLeavesA; i++) { 2000 PetscInt localp = localPointsA ? localPointsA[i] : i; 2001 2002 if (localp >= numRootsB) continue; 2003 reorderedRemotePointsA[localp] = remotePointsA[i]; 2004 } 2005 remotePointsA = reorderedRemotePointsA; 2006 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2007 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2008 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2009 leafdataB[i].rank = -1; 2010 leafdataB[i].index = -1; 2011 } 2012 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2013 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2014 PetscCall(PetscFree(reorderedRemotePointsA)); 2015 2016 denseB = (PetscBool)!localPointsB; 2017 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2018 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2019 else numLeavesBA++; 2020 } 2021 if (denseB) { 2022 localPointsBA = NULL; 2023 remotePointsBA = leafdataB; 2024 } else { 2025 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2026 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2027 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2028 const PetscInt l = localPointsB ? localPointsB[i] : i; 2029 2030 if (leafdataB[l - minleaf].rank == -1) continue; 2031 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2032 localPointsBA[numLeavesBA] = l; 2033 numLeavesBA++; 2034 } 2035 PetscCall(PetscFree(leafdataB)); 2036 } 2037 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2038 PetscCall(PetscSFSetFromOptions(*sfBA)); 2039 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 /*@ 2044 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2045 2046 Input Parameters: 2047 + sfA - The first `PetscSF` 2048 - sfB - The second `PetscSF` 2049 2050 Output Parameters: 2051 . sfBA - The composite `PetscSF`. 2052 2053 Level: developer 2054 2055 Notes: 2056 Currently, the two SFs must be defined on congruent communicators and they must be true star 2057 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2058 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2059 2060 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2061 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2062 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2063 a Reduce on sfB, on connected roots. 2064 2065 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2066 @*/ 2067 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2068 { 2069 const PetscSFNode *remotePointsA, *remotePointsB; 2070 PetscSFNode *remotePointsBA; 2071 const PetscInt *localPointsA, *localPointsB; 2072 PetscSFNode *reorderedRemotePointsA = NULL; 2073 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2074 MPI_Op op; 2075 #if defined(PETSC_USE_64BIT_INDICES) 2076 PetscBool iswin; 2077 #endif 2078 2079 PetscFunctionBegin; 2080 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2081 PetscSFCheckGraphSet(sfA, 1); 2082 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2083 PetscSFCheckGraphSet(sfB, 2); 2084 PetscCheckSameComm(sfA, 1, sfB, 2); 2085 PetscValidPointer(sfBA, 3); 2086 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2087 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2088 2089 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2090 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2091 2092 /* TODO: Check roots of sfB have degree of 1 */ 2093 /* Once we implement it, we can replace the MPI_MAXLOC 2094 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2095 We use MPI_MAXLOC only to have a deterministic output from this routine if 2096 the root condition is not meet. 2097 */ 2098 op = MPI_MAXLOC; 2099 #if defined(PETSC_USE_64BIT_INDICES) 2100 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2101 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2102 if (iswin) op = MPI_REPLACE; 2103 #endif 2104 2105 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2106 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2107 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2108 reorderedRemotePointsA[i].rank = -1; 2109 reorderedRemotePointsA[i].index = -1; 2110 } 2111 if (localPointsA) { 2112 for (i = 0; i < numLeavesA; i++) { 2113 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2114 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2115 } 2116 } else { 2117 for (i = 0; i < numLeavesA; i++) { 2118 if (i > maxleaf || i < minleaf) continue; 2119 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2120 } 2121 } 2122 2123 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2124 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2125 for (i = 0; i < numRootsB; i++) { 2126 remotePointsBA[i].rank = -1; 2127 remotePointsBA[i].index = -1; 2128 } 2129 2130 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2131 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2132 PetscCall(PetscFree(reorderedRemotePointsA)); 2133 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2134 if (remotePointsBA[i].rank == -1) continue; 2135 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2136 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2137 localPointsBA[numLeavesBA] = i; 2138 numLeavesBA++; 2139 } 2140 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2141 PetscCall(PetscSFSetFromOptions(*sfBA)); 2142 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 /* 2147 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2148 2149 Input Parameters: 2150 . sf - The global `PetscSF` 2151 2152 Output Parameters: 2153 . out - The local `PetscSF` 2154 2155 .seealso: `PetscSF`, `PetscSFCreate()` 2156 */ 2157 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2158 { 2159 MPI_Comm comm; 2160 PetscMPIInt myrank; 2161 const PetscInt *ilocal; 2162 const PetscSFNode *iremote; 2163 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2164 PetscSFNode *liremote; 2165 PetscSF lsf; 2166 2167 PetscFunctionBegin; 2168 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2169 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2170 else { 2171 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2172 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2173 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2174 2175 /* Find out local edges and build a local SF */ 2176 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2177 for (i = lnleaves = 0; i < nleaves; i++) { 2178 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2179 } 2180 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2181 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2182 2183 for (i = j = 0; i < nleaves; i++) { 2184 if (iremote[i].rank == (PetscInt)myrank) { 2185 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2186 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2187 liremote[j].index = iremote[i].index; 2188 j++; 2189 } 2190 } 2191 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2192 PetscCall(PetscSFSetFromOptions(lsf)); 2193 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2194 PetscCall(PetscSFSetUp(lsf)); 2195 *out = lsf; 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2201 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2202 { 2203 PetscMemType rootmtype, leafmtype; 2204 2205 PetscFunctionBegin; 2206 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2207 PetscCall(PetscSFSetUp(sf)); 2208 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2209 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2210 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2211 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2212 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2213 PetscFunctionReturn(0); 2214 } 2215 2216 /*@ 2217 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2218 2219 Input Parameters: 2220 + comm - the communicator 2221 . nsfs - the number of input `PetscSF` 2222 . sfs - the array of input `PetscSF` 2223 . rootMode - the root mode specifying how roots are handled 2224 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or NULL for contiguous storage 2225 2226 Output Parameters: 2227 . newsf - The resulting `PetscSF` 2228 2229 Level: advanced 2230 2231 Notes: 2232 The communicator of all SFs in sfs must be comm. 2233 2234 Leaves are always concatenated locally, keeping them ordered by the input SF index and original local order. 2235 The offsets in leafOffsets are added to the original leaf indices. 2236 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2237 In this case, NULL is also passed as ilocal to the resulting SF. 2238 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2239 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2240 2241 All root modes retain the essential connectivity condition: 2242 If two leaves of the same input SF are connected (sharing the same root), they are also connected in the output SF. 2243 Parameter rootMode controls how the input root spaces are combined. 2244 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input SF (checked in debug mode) and is also the same in the output SF. 2245 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2246 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2247 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input SF and original local index, and renumbered contiguously. 2248 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2249 roots of sfs[0], sfs[1], sfs[2, ... are joined globally, ordered by input SF index and original global index, and renumbered contiguously; 2250 the original root ranks are ignored. 2251 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2252 the output SF's root layout is such that the local number of roots is a sum of the input SF's local numbers of roots on each rank to keep the load balancing. 2253 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, that means roots can move to different ranks. 2254 2255 Example: 2256 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2257 $ make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2258 $ for m in {local,global,shared}; do 2259 $ mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2260 $ done 2261 we generate two identical SFs sf_0 and sf_1, 2262 $ PetscSF Object: sf_0 2 MPI processes 2263 $ type: basic 2264 $ rank #leaves #roots 2265 $ [ 0] 4 2 2266 $ [ 1] 4 2 2267 $ leaves roots roots in global numbering 2268 $ ( 0, 0) <- ( 0, 0) = 0 2269 $ ( 0, 1) <- ( 0, 1) = 1 2270 $ ( 0, 2) <- ( 1, 0) = 2 2271 $ ( 0, 3) <- ( 1, 1) = 3 2272 $ ( 1, 0) <- ( 0, 0) = 0 2273 $ ( 1, 1) <- ( 0, 1) = 1 2274 $ ( 1, 2) <- ( 1, 0) = 2 2275 $ ( 1, 3) <- ( 1, 1) = 3 2276 and pass them to `PetscSFConcatenate()` along with different choices of rootMode, yielding different result_sf: 2277 $ rootMode = local: 2278 $ PetscSF Object: result_sf 2 MPI processes 2279 $ type: basic 2280 $ rank #leaves #roots 2281 $ [ 0] 8 4 2282 $ [ 1] 8 4 2283 $ leaves roots roots in global numbering 2284 $ ( 0, 0) <- ( 0, 0) = 0 2285 $ ( 0, 1) <- ( 0, 1) = 1 2286 $ ( 0, 2) <- ( 1, 0) = 4 2287 $ ( 0, 3) <- ( 1, 1) = 5 2288 $ ( 0, 4) <- ( 0, 2) = 2 2289 $ ( 0, 5) <- ( 0, 3) = 3 2290 $ ( 0, 6) <- ( 1, 2) = 6 2291 $ ( 0, 7) <- ( 1, 3) = 7 2292 $ ( 1, 0) <- ( 0, 0) = 0 2293 $ ( 1, 1) <- ( 0, 1) = 1 2294 $ ( 1, 2) <- ( 1, 0) = 4 2295 $ ( 1, 3) <- ( 1, 1) = 5 2296 $ ( 1, 4) <- ( 0, 2) = 2 2297 $ ( 1, 5) <- ( 0, 3) = 3 2298 $ ( 1, 6) <- ( 1, 2) = 6 2299 $ ( 1, 7) <- ( 1, 3) = 7 2300 $ 2301 $ rootMode = global: 2302 $ PetscSF Object: result_sf 2 MPI processes 2303 $ type: basic 2304 $ rank #leaves #roots 2305 $ [ 0] 8 4 2306 $ [ 1] 8 4 2307 $ leaves roots roots in global numbering 2308 $ ( 0, 0) <- ( 0, 0) = 0 2309 $ ( 0, 1) <- ( 0, 1) = 1 2310 $ ( 0, 2) <- ( 0, 2) = 2 2311 $ ( 0, 3) <- ( 0, 3) = 3 2312 $ ( 0, 4) <- ( 1, 0) = 4 2313 $ ( 0, 5) <- ( 1, 1) = 5 2314 $ ( 0, 6) <- ( 1, 2) = 6 2315 $ ( 0, 7) <- ( 1, 3) = 7 2316 $ ( 1, 0) <- ( 0, 0) = 0 2317 $ ( 1, 1) <- ( 0, 1) = 1 2318 $ ( 1, 2) <- ( 0, 2) = 2 2319 $ ( 1, 3) <- ( 0, 3) = 3 2320 $ ( 1, 4) <- ( 1, 0) = 4 2321 $ ( 1, 5) <- ( 1, 1) = 5 2322 $ ( 1, 6) <- ( 1, 2) = 6 2323 $ ( 1, 7) <- ( 1, 3) = 7 2324 $ 2325 $ rootMode = shared: 2326 $ PetscSF Object: result_sf 2 MPI processes 2327 $ type: basic 2328 $ rank #leaves #roots 2329 $ [ 0] 8 2 2330 $ [ 1] 8 2 2331 $ leaves roots roots in global numbering 2332 $ ( 0, 0) <- ( 0, 0) = 0 2333 $ ( 0, 1) <- ( 0, 1) = 1 2334 $ ( 0, 2) <- ( 1, 0) = 2 2335 $ ( 0, 3) <- ( 1, 1) = 3 2336 $ ( 0, 4) <- ( 0, 0) = 0 2337 $ ( 0, 5) <- ( 0, 1) = 1 2338 $ ( 0, 6) <- ( 1, 0) = 2 2339 $ ( 0, 7) <- ( 1, 1) = 3 2340 $ ( 1, 0) <- ( 0, 0) = 0 2341 $ ( 1, 1) <- ( 0, 1) = 1 2342 $ ( 1, 2) <- ( 1, 0) = 2 2343 $ ( 1, 3) <- ( 1, 1) = 3 2344 $ ( 1, 4) <- ( 0, 0) = 0 2345 $ ( 1, 5) <- ( 0, 1) = 1 2346 $ ( 1, 6) <- ( 1, 0) = 2 2347 $ ( 1, 7) <- ( 1, 1) = 3 2348 2349 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2350 @*/ 2351 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2352 { 2353 PetscInt i, s, nLeaves, nRoots; 2354 PetscInt *leafArrayOffsets; 2355 PetscInt *ilocal_new; 2356 PetscSFNode *iremote_new; 2357 PetscBool all_ilocal_null = PETSC_FALSE; 2358 PetscLayout glayout = NULL; 2359 PetscInt *gremote = NULL; 2360 PetscMPIInt rank, size; 2361 2362 PetscFunctionBegin; 2363 { 2364 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2365 2366 PetscCall(PetscSFCreate(comm, &dummy)); 2367 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2368 PetscValidPointer(sfs, 3); 2369 for (i = 0; i < nsfs; i++) { 2370 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2371 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2372 } 2373 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2374 if (leafOffsets) PetscValidIntPointer(leafOffsets, 5); 2375 PetscValidPointer(newsf, 6); 2376 PetscCall(PetscSFDestroy(&dummy)); 2377 } 2378 if (!nsfs) { 2379 PetscCall(PetscSFCreate(comm, newsf)); 2380 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2381 PetscFunctionReturn(0); 2382 } 2383 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2384 PetscCallMPI(MPI_Comm_size(comm, &size)); 2385 2386 /* Calculate leaf array offsets */ 2387 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2388 leafArrayOffsets[0] = 0; 2389 for (s = 0; s < nsfs; s++) { 2390 PetscInt nl; 2391 2392 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2393 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2394 } 2395 nLeaves = leafArrayOffsets[nsfs]; 2396 2397 /* Calculate number of roots */ 2398 switch (rootMode) { 2399 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2400 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2401 if (PetscDefined(USE_DEBUG)) { 2402 for (s = 1; s < nsfs; s++) { 2403 PetscInt nr; 2404 2405 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2406 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2407 } 2408 } 2409 } break; 2410 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2411 /* Calculate also global layout in this case */ 2412 PetscInt *nls; 2413 PetscLayout *lts; 2414 PetscInt **inds; 2415 PetscInt j; 2416 PetscInt rootOffset = 0; 2417 2418 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2419 PetscCall(PetscLayoutCreate(comm, &glayout)); 2420 glayout->bs = 1; 2421 glayout->n = 0; 2422 glayout->N = 0; 2423 for (s = 0; s < nsfs; s++) { 2424 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2425 glayout->n += lts[s]->n; 2426 glayout->N += lts[s]->N; 2427 } 2428 PetscCall(PetscLayoutSetUp(glayout)); 2429 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2430 for (s = 0, j = 0; s < nsfs; s++) { 2431 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2432 rootOffset += lts[s]->N; 2433 PetscCall(PetscLayoutDestroy(<s[s])); 2434 PetscCall(PetscFree(inds[s])); 2435 } 2436 PetscCall(PetscFree3(lts, nls, inds)); 2437 nRoots = glayout->N; 2438 } break; 2439 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2440 /* nRoots calculated later in this case */ 2441 break; 2442 default: 2443 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2444 } 2445 2446 if (!leafOffsets) { 2447 all_ilocal_null = PETSC_TRUE; 2448 for (s = 0; s < nsfs; s++) { 2449 const PetscInt *ilocal; 2450 2451 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2452 if (ilocal) { 2453 all_ilocal_null = PETSC_FALSE; 2454 break; 2455 } 2456 } 2457 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2458 } 2459 2460 /* Renumber and concatenate local leaves */ 2461 ilocal_new = NULL; 2462 if (!all_ilocal_null) { 2463 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2464 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2465 for (s = 0; s < nsfs; s++) { 2466 const PetscInt *ilocal; 2467 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2468 PetscInt i, nleaves_l; 2469 2470 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2471 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2472 } 2473 } 2474 2475 /* Renumber and concatenate remote roots */ 2476 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2477 PetscInt rootOffset = 0; 2478 2479 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2480 for (i = 0; i < nLeaves; i++) { 2481 iremote_new[i].rank = -1; 2482 iremote_new[i].index = -1; 2483 } 2484 for (s = 0; s < nsfs; s++) { 2485 PetscInt i, nl, nr; 2486 PetscSF tmp_sf; 2487 const PetscSFNode *iremote; 2488 PetscSFNode *tmp_rootdata; 2489 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2490 2491 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2492 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2493 /* create helper SF with contiguous leaves */ 2494 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2495 PetscCall(PetscSFSetUp(tmp_sf)); 2496 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2497 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2498 for (i = 0; i < nr; i++) { 2499 tmp_rootdata[i].index = i + rootOffset; 2500 tmp_rootdata[i].rank = (PetscInt)rank; 2501 } 2502 rootOffset += nr; 2503 } else { 2504 for (i = 0; i < nr; i++) { 2505 tmp_rootdata[i].index = i; 2506 tmp_rootdata[i].rank = (PetscInt)rank; 2507 } 2508 } 2509 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2510 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2511 PetscCall(PetscSFDestroy(&tmp_sf)); 2512 PetscCall(PetscFree(tmp_rootdata)); 2513 } 2514 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { nRoots = rootOffset; } // else nRoots already calculated above 2515 2516 /* Build the new SF */ 2517 PetscCall(PetscSFCreate(comm, newsf)); 2518 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2519 } else { 2520 /* Build the new SF */ 2521 PetscCall(PetscSFCreate(comm, newsf)); 2522 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2523 } 2524 PetscCall(PetscSFSetUp(*newsf)); 2525 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2526 PetscCall(PetscLayoutDestroy(&glayout)); 2527 PetscCall(PetscFree(gremote)); 2528 PetscCall(PetscFree(leafArrayOffsets)); 2529 PetscFunctionReturn(0); 2530 } 2531