1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petsc/private/hashmapi.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF, int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME); 19 #else 20 #define PetscSFCheckGraphSet(sf, arg) \ 21 do { \ 22 } while (0) 23 #endif 24 #endif 25 26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL}; 27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 . -sf_type type - value of type may be 42 .vb 43 basic -Use MPI persistent Isend/Irecv for communication (Default) 44 window -Use MPI-3 one-sided window for communication 45 neighbor -Use MPI-3 neighborhood collectives for communication 46 .ve 47 48 Level: intermediate 49 50 Note: 51 When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`, 52 `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special 53 `SF`s are optimized and they have better performance than general `SF`s. 54 55 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 56 @*/ 57 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) 58 { 59 PetscSF b; 60 61 PetscFunctionBegin; 62 PetscValidPointer(sf, 2); 63 PetscCall(PetscSFInitializePackage()); 64 65 PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView)); 66 67 b->nroots = -1; 68 b->nleaves = -1; 69 b->minleaf = PETSC_MAX_INT; 70 b->maxleaf = PETSC_MIN_INT; 71 b->nranks = -1; 72 b->rankorder = PETSC_TRUE; 73 b->ingroup = MPI_GROUP_NULL; 74 b->outgroup = MPI_GROUP_NULL; 75 b->graphset = PETSC_FALSE; 76 #if defined(PETSC_HAVE_DEVICE) 77 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 78 b->use_stream_aware_mpi = PETSC_FALSE; 79 b->unknown_input_stream = PETSC_FALSE; 80 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 81 b->backend = PETSCSF_BACKEND_KOKKOS; 82 #elif defined(PETSC_HAVE_CUDA) 83 b->backend = PETSCSF_BACKEND_CUDA; 84 #elif defined(PETSC_HAVE_HIP) 85 b->backend = PETSCSF_BACKEND_HIP; 86 #endif 87 88 #if defined(PETSC_HAVE_NVSHMEM) 89 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 90 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 91 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL)); 92 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL)); 93 #endif 94 #endif 95 b->vscat.from_n = -1; 96 b->vscat.to_n = -1; 97 b->vscat.unit = MPIU_SCALAR; 98 *sf = b; 99 PetscFunctionReturn(0); 100 } 101 102 /*@ 103 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 104 105 Collective 106 107 Input Parameter: 108 . sf - star forest 109 110 Level: advanced 111 112 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 113 @*/ 114 PetscErrorCode PetscSFReset(PetscSF sf) 115 { 116 PetscFunctionBegin; 117 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 118 PetscTryTypeMethod(sf, Reset); 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_MAX_INT; 122 sf->maxleaf = PETSC_MIN_INT; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 PetscCall(PetscFree(sf->mine_alloc)); 127 PetscCall(PetscFree(sf->remote_alloc)); 128 sf->nranks = -1; 129 PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote)); 130 sf->degreeknown = PETSC_FALSE; 131 PetscCall(PetscFree(sf->degree)); 132 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 133 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 134 if (sf->multi) sf->multi->multi = NULL; 135 PetscCall(PetscSFDestroy(&sf->multi)); 136 PetscCall(PetscLayoutDestroy(&sf->map)); 137 138 #if defined(PETSC_HAVE_DEVICE) 139 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i])); 140 #endif 141 142 sf->setupcalled = PETSC_FALSE; 143 PetscFunctionReturn(0); 144 } 145 146 /*@C 147 PetscSFSetType - Set the `PetscSF` communication implementation 148 149 Collective 150 151 Input Parameters: 152 + sf - the `PetscSF` context 153 - type - a known method 154 .vb 155 PETSCSFWINDOW - MPI-2/3 one-sided 156 PETSCSFBASIC - basic implementation using MPI-1 two-sided 157 .ve 158 159 Options Database Key: 160 . -sf_type <type> - Sets the method; for example basic or window use -help for a list of available methods (for instance, window, basic, neighbor) 161 162 Level: intermediate 163 164 Notes: 165 See "include/petscsf.h" for available methods (for instance) 166 167 .seealso: `PetscSFType`, `PetscSFCreate()` 168 @*/ 169 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) 170 { 171 PetscBool match; 172 PetscErrorCode (*r)(PetscSF); 173 174 PetscFunctionBegin; 175 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 176 PetscValidCharPointer(type, 2); 177 178 PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match)); 179 if (match) PetscFunctionReturn(0); 180 181 PetscCall(PetscFunctionListFind(PetscSFList, type, &r)); 182 PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type); 183 /* Destroy the previous PetscSF implementation context */ 184 PetscTryTypeMethod(sf, Destroy); 185 PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops))); 186 PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type)); 187 PetscCall((*r)(sf)); 188 PetscFunctionReturn(0); 189 } 190 191 /*@C 192 PetscSFGetType - Get the `PetscSF` communication implementation 193 194 Not Collective 195 196 Input Parameter: 197 . sf - the `PetscSF` context 198 199 Output Parameter: 200 . type - the `PetscSF` type name 201 202 Level: intermediate 203 204 .seealso: `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()` 205 @*/ 206 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 207 { 208 PetscFunctionBegin; 209 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 210 PetscValidPointer(type, 2); 211 *type = ((PetscObject)sf)->type_name; 212 PetscFunctionReturn(0); 213 } 214 215 /*@C 216 PetscSFDestroy - destroy star forest 217 218 Collective 219 220 Input Parameter: 221 . sf - address of star forest 222 223 Level: intermediate 224 225 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()` 226 @*/ 227 PetscErrorCode PetscSFDestroy(PetscSF *sf) 228 { 229 PetscFunctionBegin; 230 if (!*sf) PetscFunctionReturn(0); 231 PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1); 232 if (--((PetscObject)(*sf))->refct > 0) { 233 *sf = NULL; 234 PetscFunctionReturn(0); 235 } 236 PetscCall(PetscSFReset(*sf)); 237 PetscTryTypeMethod((*sf), Destroy); 238 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 239 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 240 PetscCall(PetscHeaderDestroy(sf)); 241 PetscFunctionReturn(0); 242 } 243 244 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 245 { 246 PetscInt i, nleaves; 247 PetscMPIInt size; 248 const PetscInt *ilocal; 249 const PetscSFNode *iremote; 250 251 PetscFunctionBegin; 252 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 253 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote)); 254 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 255 for (i = 0; i < nleaves; i++) { 256 const PetscInt rank = iremote[i].rank; 257 const PetscInt remote = iremote[i].index; 258 const PetscInt leaf = ilocal ? ilocal[i] : i; 259 PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size); 260 PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i); 261 PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i); 262 } 263 PetscFunctionReturn(0); 264 } 265 266 /*@ 267 PetscSFSetUp - set up communication structures 268 269 Collective 270 271 Input Parameter: 272 . sf - star forest communication object 273 274 Level: beginner 275 276 .seealso: `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()` 277 @*/ 278 PetscErrorCode PetscSFSetUp(PetscSF sf) 279 { 280 PetscFunctionBegin; 281 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 282 PetscSFCheckGraphSet(sf, 1); 283 if (sf->setupcalled) PetscFunctionReturn(0); 284 PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0)); 285 PetscCall(PetscSFCheckGraphValid_Private(sf)); 286 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */ 287 PetscTryTypeMethod(sf, SetUp); 288 #if defined(PETSC_HAVE_CUDA) 289 if (sf->backend == PETSCSF_BACKEND_CUDA) { 290 sf->ops->Malloc = PetscSFMalloc_CUDA; 291 sf->ops->Free = PetscSFFree_CUDA; 292 } 293 #endif 294 #if defined(PETSC_HAVE_HIP) 295 if (sf->backend == PETSCSF_BACKEND_HIP) { 296 sf->ops->Malloc = PetscSFMalloc_HIP; 297 sf->ops->Free = PetscSFFree_HIP; 298 } 299 #endif 300 301 # 302 #if defined(PETSC_HAVE_KOKKOS) 303 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 304 sf->ops->Malloc = PetscSFMalloc_Kokkos; 305 sf->ops->Free = PetscSFFree_Kokkos; 306 } 307 #endif 308 PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0)); 309 sf->setupcalled = PETSC_TRUE; 310 PetscFunctionReturn(0); 311 } 312 313 /*@ 314 PetscSFSetFromOptions - set `PetscSF` options using the options database 315 316 Logically Collective 317 318 Input Parameter: 319 . sf - star forest 320 321 Options Database Keys: 322 + -sf_type - implementation type, see PetscSFSetType() 323 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 324 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 325 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 326 If true, this option only works with -use_gpu_aware_mpi 1. 327 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 328 If true, this option only works with -use_gpu_aware_mpi 1. 329 330 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 331 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 332 the only available is kokkos. 333 334 Level: intermediate 335 336 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()` 337 @*/ 338 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 339 { 340 PetscSFType deft; 341 char type[256]; 342 PetscBool flg; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 346 PetscObjectOptionsBegin((PetscObject)sf); 347 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 348 PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg)); 349 PetscCall(PetscSFSetType(sf, flg ? type : deft)); 350 PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL)); 351 #if defined(PETSC_HAVE_DEVICE) 352 { 353 char backendstr[32] = {0}; 354 PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set; 355 /* Change the defaults set in PetscSFCreate() with command line options */ 356 PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL)); 357 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL)); 358 PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set)); 359 PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda)); 360 PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos)); 361 PetscCall(PetscStrcasecmp("hip", backendstr, &isHip)); 362 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 363 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 364 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 365 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 366 else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 367 #elif defined(PETSC_HAVE_KOKKOS) 368 PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr); 369 #endif 370 } 371 #endif 372 PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject); 373 PetscOptionsEnd(); 374 PetscFunctionReturn(0); 375 } 376 377 /*@ 378 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 379 380 Logically Collective 381 382 Input Parameters: 383 + sf - star forest 384 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic) 385 386 Level: advanced 387 388 .seealso: `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 389 @*/ 390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) 391 { 392 PetscFunctionBegin; 393 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 394 PetscValidLogicalCollectiveBool(sf, flg, 2); 395 PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 396 sf->rankorder = flg; 397 PetscFunctionReturn(0); 398 } 399 400 /*@C 401 PetscSFSetGraph - Set a parallel star forest 402 403 Collective 404 405 Input Parameters: 406 + sf - star forest 407 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 408 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 409 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 410 during setup in debug mode) 411 . localmode - copy mode for ilocal 412 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 413 during setup in debug mode) 414 - remotemode - copy mode for iremote 415 416 Level: intermediate 417 418 Notes: 419 Leaf indices in ilocal must be unique, otherwise an error occurs. 420 421 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 422 In particular, if localmode/remotemode is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`, 423 PETSc might modify the respective array; 424 if `PETSC_USE_POINTER`, the user must delete the array after PetscSFDestroy(). 425 Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 426 427 Fortran Note: 428 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 429 430 Developer Note: 431 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 432 This also allows to compare leaf sets of two SFs easily. 433 434 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 435 @*/ 436 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) 437 { 438 PetscBool unique, contiguous; 439 440 PetscFunctionBegin; 441 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 442 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4); 443 if (nleaves > 0) PetscValidPointer(iremote, 6); 444 PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots); 445 PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves); 446 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 447 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 448 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode); 449 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode); 450 451 if (sf->nroots >= 0) { /* Reset only if graph already set */ 452 PetscCall(PetscSFReset(sf)); 453 } 454 455 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0)); 456 457 sf->nroots = nroots; 458 sf->nleaves = nleaves; 459 460 if (localmode == PETSC_COPY_VALUES && ilocal) { 461 PetscInt *tlocal = NULL; 462 463 PetscCall(PetscMalloc1(nleaves, &tlocal)); 464 PetscCall(PetscArraycpy(tlocal, ilocal, nleaves)); 465 ilocal = tlocal; 466 } 467 if (remotemode == PETSC_COPY_VALUES) { 468 PetscSFNode *tremote = NULL; 469 470 PetscCall(PetscMalloc1(nleaves, &tremote)); 471 PetscCall(PetscArraycpy(tremote, iremote, nleaves)); 472 iremote = tremote; 473 } 474 475 if (nleaves && ilocal) { 476 PetscSFNode work; 477 478 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 479 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 480 unique = PetscNot(unique); 481 PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF"); 482 sf->minleaf = ilocal[0]; 483 sf->maxleaf = ilocal[nleaves - 1]; 484 contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1); 485 } else { 486 sf->minleaf = 0; 487 sf->maxleaf = nleaves - 1; 488 unique = PETSC_TRUE; 489 contiguous = PETSC_TRUE; 490 } 491 492 if (contiguous) { 493 if (localmode == PETSC_USE_POINTER) { 494 ilocal = NULL; 495 } else { 496 PetscCall(PetscFree(ilocal)); 497 } 498 } 499 sf->mine = ilocal; 500 if (localmode == PETSC_USE_POINTER) { 501 sf->mine_alloc = NULL; 502 } else { 503 sf->mine_alloc = ilocal; 504 } 505 sf->remote = iremote; 506 if (remotemode == PETSC_USE_POINTER) { 507 sf->remote_alloc = NULL; 508 } else { 509 sf->remote_alloc = iremote; 510 } 511 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0)); 512 sf->graphset = PETSC_TRUE; 513 PetscFunctionReturn(0); 514 } 515 516 /*@ 517 PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern 518 519 Collective 520 521 Input Parameters: 522 + sf - The `PetscSF` 523 . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`) 524 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL` 525 526 Level: intermediate 527 528 Notes: 529 It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector x and its layout is map. 530 n and N are local and global sizes of x respectively. 531 532 With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to 533 sequential vectors y on all ranks. 534 535 With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to a 536 sequential vector y on rank 0. 537 538 In above cases, entries of x are roots and entries of y are leaves. 539 540 With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of sf's communicator. The routine 541 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 542 of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does 543 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 544 items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines. 545 546 In this case, roots and leaves are symmetric. 547 548 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 549 @*/ 550 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) 551 { 552 MPI_Comm comm; 553 PetscInt n, N, res[2]; 554 PetscMPIInt rank, size; 555 PetscSFType type; 556 557 PetscFunctionBegin; 558 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 559 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2); 560 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 561 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern); 562 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 563 PetscCallMPI(MPI_Comm_size(comm, &size)); 564 565 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 566 type = PETSCSFALLTOALL; 567 PetscCall(PetscLayoutCreate(comm, &sf->map)); 568 PetscCall(PetscLayoutSetLocalSize(sf->map, size)); 569 PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size)); 570 PetscCall(PetscLayoutSetUp(sf->map)); 571 } else { 572 PetscCall(PetscLayoutGetLocalSize(map, &n)); 573 PetscCall(PetscLayoutGetSize(map, &N)); 574 res[0] = n; 575 res[1] = -n; 576 /* Check if n are same over all ranks so that we can optimize it */ 577 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm)); 578 if (res[0] == -res[1]) { /* same n */ 579 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 580 } else { 581 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 582 } 583 PetscCall(PetscLayoutReference(map, &sf->map)); 584 } 585 PetscCall(PetscSFSetType(sf, type)); 586 587 sf->pattern = pattern; 588 sf->mine = NULL; /* Contiguous */ 589 590 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 591 Also set other easy stuff. 592 */ 593 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 594 sf->nleaves = N; 595 sf->nroots = n; 596 sf->nranks = size; 597 sf->minleaf = 0; 598 sf->maxleaf = N - 1; 599 } else if (pattern == PETSCSF_PATTERN_GATHER) { 600 sf->nleaves = rank ? 0 : N; 601 sf->nroots = n; 602 sf->nranks = rank ? 0 : size; 603 sf->minleaf = 0; 604 sf->maxleaf = rank ? -1 : N - 1; 605 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 606 sf->nleaves = size; 607 sf->nroots = size; 608 sf->nranks = size; 609 sf->minleaf = 0; 610 sf->maxleaf = size - 1; 611 } 612 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 613 sf->graphset = PETSC_TRUE; 614 PetscFunctionReturn(0); 615 } 616 617 /*@ 618 PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map 619 620 Collective 621 622 Input Parameter: 623 . sf - star forest to invert 624 625 Output Parameter: 626 . isf - inverse of sf 627 628 Level: advanced 629 630 Notes: 631 All roots must have degree 1. 632 633 The local space may be a permutation, but cannot be sparse. 634 635 .seealso: `PetscSFType`, `PetscSFSetGraph()` 636 @*/ 637 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) 638 { 639 PetscMPIInt rank; 640 PetscInt i, nroots, nleaves, maxlocal, count, *newilocal; 641 const PetscInt *ilocal; 642 PetscSFNode *roots, *leaves; 643 644 PetscFunctionBegin; 645 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 646 PetscSFCheckGraphSet(sf, 1); 647 PetscValidPointer(isf, 2); 648 649 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL)); 650 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 651 652 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 653 PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves)); 654 for (i = 0; i < maxlocal; i++) { 655 leaves[i].rank = rank; 656 leaves[i].index = i; 657 } 658 for (i = 0; i < nroots; i++) { 659 roots[i].rank = -1; 660 roots[i].index = -1; 661 } 662 PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 663 PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE)); 664 665 /* Check whether our leaves are sparse */ 666 for (i = 0, count = 0; i < nroots; i++) 667 if (roots[i].rank >= 0) count++; 668 if (count == nroots) newilocal = NULL; 669 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal)); 670 for (i = 0, count = 0; i < nroots; i++) { 671 if (roots[i].rank >= 0) { 672 newilocal[count] = i; 673 roots[count].rank = roots[i].rank; 674 roots[count].index = roots[i].index; 675 count++; 676 } 677 } 678 } 679 680 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf)); 681 PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES)); 682 PetscCall(PetscFree2(roots, leaves)); 683 PetscFunctionReturn(0); 684 } 685 686 /*@ 687 PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph 688 689 Collective 690 691 Input Parameters: 692 + sf - communication object to duplicate 693 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`) 694 695 Output Parameter: 696 . newsf - new communication object 697 698 Level: beginner 699 700 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 701 @*/ 702 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) 703 { 704 PetscSFType type; 705 MPI_Datatype dtype = MPIU_SCALAR; 706 707 PetscFunctionBegin; 708 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 709 PetscValidLogicalCollectiveEnum(sf, opt, 2); 710 PetscValidPointer(newsf, 3); 711 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf)); 712 PetscCall(PetscSFGetType(sf, &type)); 713 if (type) PetscCall(PetscSFSetType(*newsf, type)); 714 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */ 715 if (opt == PETSCSF_DUPLICATE_GRAPH) { 716 PetscSFCheckGraphSet(sf, 1); 717 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 718 PetscInt nroots, nleaves; 719 const PetscInt *ilocal; 720 const PetscSFNode *iremote; 721 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 722 PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 723 } else { 724 PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern)); 725 } 726 } 727 /* Since oldtype is committed, so is newtype, according to MPI */ 728 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype)); 729 (*newsf)->vscat.bs = sf->vscat.bs; 730 (*newsf)->vscat.unit = dtype; 731 (*newsf)->vscat.to_n = sf->vscat.to_n; 732 (*newsf)->vscat.from_n = sf->vscat.from_n; 733 /* Do not copy lsf. Build it on demand since it is rarely used */ 734 735 #if defined(PETSC_HAVE_DEVICE) 736 (*newsf)->backend = sf->backend; 737 (*newsf)->unknown_input_stream = sf->unknown_input_stream; 738 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 739 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 740 #endif 741 PetscTryTypeMethod(sf, Duplicate, opt, *newsf); 742 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 743 PetscFunctionReturn(0); 744 } 745 746 /*@C 747 PetscSFGetGraph - Get the graph specifying a parallel star forest 748 749 Not Collective 750 751 Input Parameter: 752 . sf - star forest 753 754 Output Parameters: 755 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 756 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 757 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 758 - iremote - remote locations of root vertices for each leaf on the current process 759 760 Level: intermediate 761 762 Notes: 763 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 764 765 The returned ilocal/iremote might contain values in different order than the input ones in `PetscSFSetGraph()`, 766 see its manpage for details. 767 768 Fortran Notes: 769 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 770 want to update the graph, you must call `PetscSFSetGraph()` after modifying the iremote array. 771 772 To check for a NULL ilocal use 773 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 774 775 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 776 @*/ 777 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 778 { 779 PetscFunctionBegin; 780 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 781 if (sf->ops->GetGraph) { 782 PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote)); 783 } else { 784 if (nroots) *nroots = sf->nroots; 785 if (nleaves) *nleaves = sf->nleaves; 786 if (ilocal) *ilocal = sf->mine; 787 if (iremote) *iremote = sf->remote; 788 } 789 PetscFunctionReturn(0); 790 } 791 792 /*@ 793 PetscSFGetLeafRange - Get the active leaf ranges 794 795 Not Collective 796 797 Input Parameter: 798 . sf - star forest 799 800 Output Parameters: 801 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 802 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 803 804 Level: developer 805 806 .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 807 @*/ 808 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) 809 { 810 PetscFunctionBegin; 811 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 812 PetscSFCheckGraphSet(sf, 1); 813 if (minleaf) *minleaf = sf->minleaf; 814 if (maxleaf) *maxleaf = sf->maxleaf; 815 PetscFunctionReturn(0); 816 } 817 818 /*@C 819 PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database 820 821 Collective on A 822 823 Input Parameters: 824 + A - the star forest 825 . obj - Optional object that provides the prefix for the option names 826 - name - command line option 827 828 Level: intermediate 829 830 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 831 @*/ 832 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) 833 { 834 PetscFunctionBegin; 835 PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1); 836 PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name)); 837 PetscFunctionReturn(0); 838 } 839 840 /*@C 841 PetscSFView - view a star forest 842 843 Collective 844 845 Input Parameters: 846 + sf - star forest 847 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD` 848 849 Level: beginner 850 851 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()` 852 @*/ 853 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) 854 { 855 PetscBool iascii; 856 PetscViewerFormat format; 857 858 PetscFunctionBegin; 859 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 860 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer)); 861 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 862 PetscCheckSameComm(sf, 1, viewer, 2); 863 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 864 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 865 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 866 PetscMPIInt rank; 867 PetscInt ii, i, j; 868 869 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer)); 870 PetscCall(PetscViewerASCIIPushTab(viewer)); 871 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 872 if (!sf->graphset) { 873 PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n")); 874 PetscCall(PetscViewerASCIIPopTab(viewer)); 875 PetscFunctionReturn(0); 876 } 877 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 878 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 879 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks)); 880 for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index)); 881 PetscCall(PetscViewerFlush(viewer)); 882 PetscCall(PetscViewerGetFormat(viewer, &format)); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 PetscMPIInt *tmpranks, *perm; 885 PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm)); 886 PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks)); 887 for (i = 0; i < sf->nranks; i++) perm[i] = i; 888 PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm)); 889 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank)); 890 for (ii = 0; ii < sf->nranks; ii++) { 891 i = perm[ii]; 892 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i])); 893 for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j])); 894 } 895 PetscCall(PetscFree2(tmpranks, perm)); 896 } 897 PetscCall(PetscViewerFlush(viewer)); 898 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 899 } 900 PetscCall(PetscViewerASCIIPopTab(viewer)); 901 } 902 PetscTryTypeMethod(sf, View, viewer); 903 PetscFunctionReturn(0); 904 } 905 906 /*@C 907 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 908 909 Not Collective 910 911 Input Parameter: 912 . sf - star forest 913 914 Output Parameters: 915 + nranks - number of ranks referenced by local part 916 . ranks - array of ranks 917 . roffset - offset in rmine/rremote for each rank (length nranks+1) 918 . rmine - concatenated array holding local indices referencing each remote rank 919 - rremote - concatenated array holding remote indices referenced for each remote rank 920 921 Level: developer 922 923 .seealso: `PetscSF`, `PetscSFGetLeafRanks()` 924 @*/ 925 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) 926 { 927 PetscFunctionBegin; 928 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 929 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 930 if (sf->ops->GetRootRanks) { 931 PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote)); 932 } else { 933 /* The generic implementation */ 934 if (nranks) *nranks = sf->nranks; 935 if (ranks) *ranks = sf->ranks; 936 if (roffset) *roffset = sf->roffset; 937 if (rmine) *rmine = sf->rmine; 938 if (rremote) *rremote = sf->rremote; 939 } 940 PetscFunctionReturn(0); 941 } 942 943 /*@C 944 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 945 946 Not Collective 947 948 Input Parameter: 949 . sf - star forest 950 951 Output Parameters: 952 + niranks - number of leaf ranks referencing roots on this process 953 . iranks - array of ranks 954 . ioffset - offset in irootloc for each rank (length niranks+1) 955 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 956 957 Level: developer 958 959 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 960 @*/ 961 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 962 { 963 PetscFunctionBegin; 964 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 965 PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks"); 966 if (sf->ops->GetLeafRanks) { 967 PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc)); 968 } else { 969 PetscSFType type; 970 PetscCall(PetscSFGetType(sf, &type)); 971 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 972 } 973 PetscFunctionReturn(0); 974 } 975 976 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) 977 { 978 PetscInt i; 979 for (i = 0; i < n; i++) { 980 if (needle == list[i]) return PETSC_TRUE; 981 } 982 return PETSC_FALSE; 983 } 984 985 /*@C 986 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations. 987 988 Collective 989 990 Input Parameters: 991 + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called 992 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange) 993 994 Level: developer 995 996 .seealso: `PetscSF`, `PetscSFGetRootRanks()` 997 @*/ 998 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) 999 { 1000 PetscHMapI table; 1001 PetscHashIter pos; 1002 PetscMPIInt size, groupsize, *groupranks; 1003 PetscInt *rcount, *ranks; 1004 PetscInt i, irank = -1, orank = -1; 1005 1006 PetscFunctionBegin; 1007 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1008 PetscSFCheckGraphSet(sf, 1); 1009 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size)); 1010 PetscCall(PetscHMapICreateWithSize(10, &table)); 1011 for (i = 0; i < sf->nleaves; i++) { 1012 /* Log 1-based rank */ 1013 PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES)); 1014 } 1015 PetscCall(PetscHMapIGetSize(table, &sf->nranks)); 1016 PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote)); 1017 PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks)); 1018 PetscHashIterBegin(table, pos); 1019 for (i = 0; i < sf->nranks; i++) { 1020 PetscHashIterGetKey(table, pos, ranks[i]); 1021 PetscHashIterGetVal(table, pos, rcount[i]); 1022 PetscHashIterNext(table, pos); 1023 ranks[i]--; /* Convert back to 0-based */ 1024 } 1025 PetscCall(PetscHMapIDestroy(&table)); 1026 1027 /* We expect that dgroup is reliably "small" while nranks could be large */ 1028 { 1029 MPI_Group group = MPI_GROUP_NULL; 1030 PetscMPIInt *dgroupranks; 1031 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1032 PetscCallMPI(MPI_Group_size(dgroup, &groupsize)); 1033 PetscCall(PetscMalloc1(groupsize, &dgroupranks)); 1034 PetscCall(PetscMalloc1(groupsize, &groupranks)); 1035 for (i = 0; i < groupsize; i++) dgroupranks[i] = i; 1036 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks)); 1037 PetscCallMPI(MPI_Group_free(&group)); 1038 PetscCall(PetscFree(dgroupranks)); 1039 } 1040 1041 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1042 for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) { 1043 for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */ 1044 if (InList(ranks[i], groupsize, groupranks)) break; 1045 } 1046 for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1047 if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break; 1048 } 1049 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1050 PetscInt tmprank, tmpcount; 1051 1052 tmprank = ranks[i]; 1053 tmpcount = rcount[i]; 1054 ranks[i] = ranks[sf->ndranks]; 1055 rcount[i] = rcount[sf->ndranks]; 1056 ranks[sf->ndranks] = tmprank; 1057 rcount[sf->ndranks] = tmpcount; 1058 sf->ndranks++; 1059 } 1060 } 1061 PetscCall(PetscFree(groupranks)); 1062 PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount)); 1063 PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks)); 1064 sf->roffset[0] = 0; 1065 for (i = 0; i < sf->nranks; i++) { 1066 PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i)); 1067 sf->roffset[i + 1] = sf->roffset[i] + rcount[i]; 1068 rcount[i] = 0; 1069 } 1070 for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) { 1071 /* short circuit */ 1072 if (orank != sf->remote[i].rank) { 1073 /* Search for index of iremote[i].rank in sf->ranks */ 1074 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank)); 1075 if (irank < 0) { 1076 PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank)); 1077 if (irank >= 0) irank += sf->ndranks; 1078 } 1079 orank = sf->remote[i].rank; 1080 } 1081 PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank); 1082 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1083 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1084 rcount[irank]++; 1085 } 1086 PetscCall(PetscFree2(rcount, ranks)); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 /*@C 1091 PetscSFGetGroups - gets incoming and outgoing process groups 1092 1093 Collective 1094 1095 Input Parameter: 1096 . sf - star forest 1097 1098 Output Parameters: 1099 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1100 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1101 1102 Level: developer 1103 1104 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1105 @*/ 1106 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) 1107 { 1108 MPI_Group group = MPI_GROUP_NULL; 1109 1110 PetscFunctionBegin; 1111 PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups"); 1112 if (sf->ingroup == MPI_GROUP_NULL) { 1113 PetscInt i; 1114 const PetscInt *indegree; 1115 PetscMPIInt rank, *outranks, *inranks; 1116 PetscSFNode *remote; 1117 PetscSF bgcount; 1118 1119 /* Compute the number of incoming ranks */ 1120 PetscCall(PetscMalloc1(sf->nranks, &remote)); 1121 for (i = 0; i < sf->nranks; i++) { 1122 remote[i].rank = sf->ranks[i]; 1123 remote[i].index = 0; 1124 } 1125 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount)); 1126 PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1127 PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree)); 1128 PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree)); 1129 /* Enumerate the incoming ranks */ 1130 PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks)); 1131 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1132 for (i = 0; i < sf->nranks; i++) outranks[i] = rank; 1133 PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks)); 1134 PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks)); 1135 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1136 PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup)); 1137 PetscCallMPI(MPI_Group_free(&group)); 1138 PetscCall(PetscFree2(inranks, outranks)); 1139 PetscCall(PetscSFDestroy(&bgcount)); 1140 } 1141 *incoming = sf->ingroup; 1142 1143 if (sf->outgroup == MPI_GROUP_NULL) { 1144 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group)); 1145 PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup)); 1146 PetscCallMPI(MPI_Group_free(&group)); 1147 } 1148 *outgoing = sf->outgroup; 1149 PetscFunctionReturn(0); 1150 } 1151 1152 /*@ 1153 PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters 1154 1155 Collective 1156 1157 Input Parameter: 1158 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1159 1160 Output Parameter: 1161 . multi - star forest with split roots, such that each root has degree exactly 1 1162 1163 Level: developer 1164 1165 Note: 1166 In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi 1167 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1168 edge, it is a candidate for future optimization that might involve its removal. 1169 1170 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1171 @*/ 1172 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) 1173 { 1174 PetscFunctionBegin; 1175 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1176 PetscValidPointer(multi, 2); 1177 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1178 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1179 *multi = sf->multi; 1180 sf->multi->multi = sf->multi; 1181 PetscFunctionReturn(0); 1182 } 1183 if (!sf->multi) { 1184 const PetscInt *indegree; 1185 PetscInt i, *inoffset, *outones, *outoffset, maxlocal; 1186 PetscSFNode *remote; 1187 maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */ 1188 PetscCall(PetscSFComputeDegreeBegin(sf, &indegree)); 1189 PetscCall(PetscSFComputeDegreeEnd(sf, &indegree)); 1190 PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset)); 1191 inoffset[0] = 0; 1192 for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i]; 1193 for (i = 0; i < maxlocal; i++) outones[i] = 1; 1194 PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1195 PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM)); 1196 for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1197 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1198 for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp"); 1199 } 1200 PetscCall(PetscMalloc1(sf->nleaves, &remote)); 1201 for (i = 0; i < sf->nleaves; i++) { 1202 remote[i].rank = sf->remote[i].rank; 1203 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1204 } 1205 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi)); 1206 sf->multi->multi = sf->multi; 1207 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER)); 1208 if (sf->rankorder) { /* Sort the ranks */ 1209 PetscMPIInt rank; 1210 PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree; 1211 PetscSFNode *newremote; 1212 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 1213 for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]); 1214 PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset)); 1215 for (i = 0; i < maxlocal; i++) outranks[i] = rank; 1216 PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1217 PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE)); 1218 /* Sort the incoming ranks at each vertex, build the inverse map */ 1219 for (i = 0; i < sf->nroots; i++) { 1220 PetscInt j; 1221 for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j; 1222 PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset)); 1223 for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1224 } 1225 PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1226 PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE)); 1227 PetscCall(PetscMalloc1(sf->nleaves, &newremote)); 1228 for (i = 0; i < sf->nleaves; i++) { 1229 newremote[i].rank = sf->remote[i].rank; 1230 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1231 } 1232 PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER)); 1233 PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset)); 1234 } 1235 PetscCall(PetscFree3(inoffset, outones, outoffset)); 1236 } 1237 *multi = sf->multi; 1238 PetscFunctionReturn(0); 1239 } 1240 1241 /*@C 1242 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1243 1244 Collective 1245 1246 Input Parameters: 1247 + sf - original star forest 1248 . nselected - number of selected roots on this process 1249 - selected - indices of the selected roots on this process 1250 1251 Output Parameter: 1252 . esf - new star forest 1253 1254 Level: advanced 1255 1256 Note: 1257 To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can 1258 be done by calling PetscSFGetGraph(). 1259 1260 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1261 @*/ 1262 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) 1263 { 1264 PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 1265 const PetscInt *ilocal; 1266 signed char *rootdata, *leafdata, *leafmem; 1267 const PetscSFNode *iremote; 1268 PetscSFNode *new_iremote; 1269 MPI_Comm comm; 1270 1271 PetscFunctionBegin; 1272 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1273 PetscSFCheckGraphSet(sf, 1); 1274 if (nselected) PetscValidIntPointer(selected, 3); 1275 PetscValidPointer(esf, 4); 1276 1277 PetscCall(PetscSFSetUp(sf)); 1278 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1279 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1280 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 1281 1282 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1283 PetscBool dups; 1284 PetscCall(PetscCheckDupsInt(nselected, selected, &dups)); 1285 PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups"); 1286 for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots); 1287 } 1288 1289 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf); 1290 else { 1291 /* A generic version of creating embedded sf */ 1292 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 1293 maxlocal = maxleaf - minleaf + 1; 1294 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 1295 leafdata = leafmem - minleaf; 1296 /* Tag selected roots and bcast to leaves */ 1297 for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1; 1298 PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1299 PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE)); 1300 1301 /* Build esf with leaves that are still connected */ 1302 esf_nleaves = 0; 1303 for (i = 0; i < nleaves; i++) { 1304 j = ilocal ? ilocal[i] : i; 1305 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1306 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1307 */ 1308 esf_nleaves += (leafdata[j] ? 1 : 0); 1309 } 1310 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 1311 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 1312 for (i = n = 0; i < nleaves; i++) { 1313 j = ilocal ? ilocal[i] : i; 1314 if (leafdata[j]) { 1315 new_ilocal[n] = j; 1316 new_iremote[n].rank = iremote[i].rank; 1317 new_iremote[n].index = iremote[i].index; 1318 ++n; 1319 } 1320 } 1321 PetscCall(PetscSFCreate(comm, esf)); 1322 PetscCall(PetscSFSetFromOptions(*esf)); 1323 PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1324 PetscCall(PetscFree2(rootdata, leafmem)); 1325 } 1326 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0)); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /*@C 1331 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1332 1333 Collective 1334 1335 Input Parameters: 1336 + sf - original star forest 1337 . nselected - number of selected leaves on this process 1338 - selected - indices of the selected leaves on this process 1339 1340 Output Parameter: 1341 . newsf - new star forest 1342 1343 Level: advanced 1344 1345 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1346 @*/ 1347 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 1348 { 1349 const PetscSFNode *iremote; 1350 PetscSFNode *new_iremote; 1351 const PetscInt *ilocal; 1352 PetscInt i, nroots, *leaves, *new_ilocal; 1353 MPI_Comm comm; 1354 1355 PetscFunctionBegin; 1356 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1357 PetscSFCheckGraphSet(sf, 1); 1358 if (nselected) PetscValidIntPointer(selected, 3); 1359 PetscValidPointer(newsf, 4); 1360 1361 /* Uniq selected[] and put results in leaves[] */ 1362 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1363 PetscCall(PetscMalloc1(nselected, &leaves)); 1364 PetscCall(PetscArraycpy(leaves, selected, nselected)); 1365 PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves)); 1366 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves); 1367 1368 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1369 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf); 1370 else { 1371 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote)); 1372 PetscCall(PetscMalloc1(nselected, &new_ilocal)); 1373 PetscCall(PetscMalloc1(nselected, &new_iremote)); 1374 for (i = 0; i < nselected; ++i) { 1375 const PetscInt l = leaves[i]; 1376 new_ilocal[i] = ilocal ? ilocal[l] : l; 1377 new_iremote[i].rank = iremote[l].rank; 1378 new_iremote[i].index = iremote[l].index; 1379 } 1380 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf)); 1381 PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 1382 } 1383 PetscCall(PetscFree(leaves)); 1384 PetscFunctionReturn(0); 1385 } 1386 1387 /*@C 1388 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()` 1389 1390 Collective 1391 1392 Input Parameters: 1393 + sf - star forest on which to communicate 1394 . unit - data type associated with each node 1395 . rootdata - buffer to broadcast 1396 - op - operation to use for reduction 1397 1398 Output Parameter: 1399 . leafdata - buffer to be reduced with values from each leaf's respective root 1400 1401 Level: intermediate 1402 1403 Notes: 1404 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1405 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1406 use `PetscSFBcastWithMemTypeBegin()` instead. 1407 1408 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1409 @*/ 1410 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1411 { 1412 PetscMemType rootmtype, leafmtype; 1413 1414 PetscFunctionBegin; 1415 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1416 PetscCall(PetscSFSetUp(sf)); 1417 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1418 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1419 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1420 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1421 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 /*@C 1426 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to `PetscSFBcastEnd()` 1427 1428 Collective 1429 1430 Input Parameters: 1431 + sf - star forest on which to communicate 1432 . unit - data type associated with each node 1433 . rootmtype - memory type of rootdata 1434 . rootdata - buffer to broadcast 1435 . leafmtype - memory type of leafdata 1436 - op - operation to use for reduction 1437 1438 Output Parameter: 1439 . leafdata - buffer to be reduced with values from each leaf's respective root 1440 1441 Level: intermediate 1442 1443 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1444 @*/ 1445 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1446 { 1447 PetscFunctionBegin; 1448 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1449 PetscCall(PetscSFSetUp(sf)); 1450 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1451 PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op); 1452 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 1453 PetscFunctionReturn(0); 1454 } 1455 1456 /*@C 1457 PetscSFBcastEnd - end a broadcast & reduce operation started with `PetscSFBcastBegin()` 1458 1459 Collective 1460 1461 Input Parameters: 1462 + sf - star forest 1463 . unit - data type 1464 . rootdata - buffer to broadcast 1465 - op - operation to use for reduction 1466 1467 Output Parameter: 1468 . leafdata - buffer to be reduced with values from each leaf's respective root 1469 1470 Level: intermediate 1471 1472 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1473 @*/ 1474 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1475 { 1476 PetscFunctionBegin; 1477 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1478 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1479 PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op); 1480 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0)); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 /*@C 1485 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()` 1486 1487 Collective 1488 1489 Input Parameters: 1490 + sf - star forest 1491 . unit - data type 1492 . leafdata - values to reduce 1493 - op - reduction operation 1494 1495 Output Parameter: 1496 . rootdata - result of reduction of values from all leaves of each root 1497 1498 Level: intermediate 1499 1500 Notes: 1501 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1502 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1503 use `PetscSFReduceWithMemTypeBegin()` instead. 1504 1505 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()` 1506 @*/ 1507 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1508 { 1509 PetscMemType rootmtype, leafmtype; 1510 1511 PetscFunctionBegin; 1512 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1513 PetscCall(PetscSFSetUp(sf)); 1514 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1515 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1516 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1517 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1518 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 /*@C 1523 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()` 1524 1525 Collective 1526 1527 Input Parameters: 1528 + sf - star forest 1529 . unit - data type 1530 . leafmtype - memory type of leafdata 1531 . leafdata - values to reduce 1532 . rootmtype - memory type of rootdata 1533 - op - reduction operation 1534 1535 Output Parameter: 1536 . rootdata - result of reduction of values from all leaves of each root 1537 1538 Level: intermediate 1539 1540 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()` 1541 @*/ 1542 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1543 { 1544 PetscFunctionBegin; 1545 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1546 PetscCall(PetscSFSetUp(sf)); 1547 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1548 PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op)); 1549 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0)); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 /*@C 1554 PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` 1555 1556 Collective 1557 1558 Input Parameters: 1559 + sf - star forest 1560 . unit - data type 1561 . leafdata - values to reduce 1562 - op - reduction operation 1563 1564 Output Parameter: 1565 . rootdata - result of reduction of values from all leaves of each root 1566 1567 Level: intermediate 1568 1569 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()` 1570 @*/ 1571 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1572 { 1573 PetscFunctionBegin; 1574 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1575 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1576 PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op); 1577 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0)); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 /*@C 1582 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, 1583 to be completed with `PetscSFFetchAndOpEnd()` 1584 1585 Collective 1586 1587 Input Parameters: 1588 + sf - star forest 1589 . unit - data type 1590 . leafdata - leaf values to use in reduction 1591 - op - operation to use for reduction 1592 1593 Output Parameters: 1594 + rootdata - root values to be updated, input state is seen by first process to perform an update 1595 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1596 1597 Level: advanced 1598 1599 Note: 1600 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1601 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1602 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1603 integers. 1604 1605 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1606 @*/ 1607 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1608 { 1609 PetscMemType rootmtype, leafmtype, leafupdatemtype; 1610 1611 PetscFunctionBegin; 1612 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1613 PetscCall(PetscSFSetUp(sf)); 1614 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1615 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 1616 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 1617 PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype)); 1618 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1619 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1620 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1621 PetscFunctionReturn(0); 1622 } 1623 1624 /*@C 1625 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by 1626 applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()` 1627 1628 Collective 1629 1630 Input Parameters: 1631 + sf - star forest 1632 . unit - data type 1633 . rootmtype - memory type of rootdata 1634 . leafmtype - memory type of leafdata 1635 . leafdata - leaf values to use in reduction 1636 . leafupdatemtype - memory type of leafupdate 1637 - op - operation to use for reduction 1638 1639 Output Parameters: 1640 + rootdata - root values to be updated, input state is seen by first process to perform an update 1641 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1642 1643 Level: advanced 1644 1645 Note: 1646 See `PetscSFFetchAndOpBegin()` for more details. 1647 1648 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1649 @*/ 1650 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) 1651 { 1652 PetscFunctionBegin; 1653 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1654 PetscCall(PetscSFSetUp(sf)); 1655 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1656 PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types"); 1657 PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op); 1658 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0)); 1659 PetscFunctionReturn(0); 1660 } 1661 1662 /*@C 1663 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1664 1665 Collective 1666 1667 Input Parameters: 1668 + sf - star forest 1669 . unit - data type 1670 . leafdata - leaf values to use in reduction 1671 - op - operation to use for reduction 1672 1673 Output Parameters: 1674 + rootdata - root values to be updated, input state is seen by first process to perform an update 1675 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1676 1677 Level: advanced 1678 1679 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()` 1680 @*/ 1681 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1682 { 1683 PetscFunctionBegin; 1684 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1685 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1686 PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op); 1687 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0)); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 /*@C 1692 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()` 1693 1694 Collective 1695 1696 Input Parameter: 1697 . sf - star forest 1698 1699 Output Parameter: 1700 . degree - degree of each root vertex 1701 1702 Level: advanced 1703 1704 Note: 1705 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it. 1706 1707 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()` 1708 @*/ 1709 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) 1710 { 1711 PetscFunctionBegin; 1712 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1713 PetscSFCheckGraphSet(sf, 1); 1714 PetscValidPointer(degree, 2); 1715 if (!sf->degreeknown) { 1716 PetscInt i, nroots = sf->nroots, maxlocal; 1717 PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1718 maxlocal = sf->maxleaf - sf->minleaf + 1; 1719 PetscCall(PetscMalloc1(nroots, &sf->degree)); 1720 PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1721 for (i = 0; i < nroots; i++) sf->degree[i] = 0; 1722 for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1; 1723 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1724 } 1725 *degree = NULL; 1726 PetscFunctionReturn(0); 1727 } 1728 1729 /*@C 1730 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()` 1731 1732 Collective 1733 1734 Input Parameter: 1735 . sf - star forest 1736 1737 Output Parameter: 1738 . degree - degree of each root vertex 1739 1740 Level: developer 1741 1742 Note: 1743 The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it. 1744 1745 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()` 1746 @*/ 1747 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) 1748 { 1749 PetscFunctionBegin; 1750 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1751 PetscSFCheckGraphSet(sf, 1); 1752 PetscValidPointer(degree, 2); 1753 if (!sf->degreeknown) { 1754 PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1755 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM)); 1756 PetscCall(PetscFree(sf->degreetmp)); 1757 sf->degreeknown = PETSC_TRUE; 1758 } 1759 *degree = sf->degree; 1760 PetscFunctionReturn(0); 1761 } 1762 1763 /*@C 1764 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by `PetscSFGetMultiSF()`). 1765 Each multi-root is assigned index of the corresponding original root. 1766 1767 Collective 1768 1769 Input Parameters: 1770 + sf - star forest 1771 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()` 1772 1773 Output Parameters: 1774 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1775 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1776 1777 Level: developer 1778 1779 Note: 1780 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with `PetscFree()` when no longer needed. 1781 1782 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1783 @*/ 1784 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1785 { 1786 PetscSF msf; 1787 PetscInt i, j, k, nroots, nmroots; 1788 1789 PetscFunctionBegin; 1790 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1791 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1792 if (nroots) PetscValidIntPointer(degree, 2); 1793 if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3); 1794 PetscValidPointer(multiRootsOrigNumbering, 4); 1795 PetscCall(PetscSFGetMultiSF(sf, &msf)); 1796 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1797 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1798 for (i = 0, j = 0, k = 0; i < nroots; i++) { 1799 if (!degree[i]) continue; 1800 for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i; 1801 } 1802 PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail"); 1803 if (nMultiRoots) *nMultiRoots = nmroots; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 /*@C 1808 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()` 1809 1810 Collective 1811 1812 Input Parameters: 1813 + sf - star forest 1814 . unit - data type 1815 - leafdata - leaf data to gather to roots 1816 1817 Output Parameter: 1818 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1819 1820 Level: intermediate 1821 1822 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1823 @*/ 1824 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1825 { 1826 PetscSF multi = NULL; 1827 1828 PetscFunctionBegin; 1829 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1830 PetscCall(PetscSFSetUp(sf)); 1831 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1832 PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 /*@C 1837 PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()` 1838 1839 Collective 1840 1841 Input Parameters: 1842 + sf - star forest 1843 . unit - data type 1844 - leafdata - leaf data to gather to roots 1845 1846 Output Parameter: 1847 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1848 1849 Level: intermediate 1850 1851 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1852 @*/ 1853 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) 1854 { 1855 PetscSF multi = NULL; 1856 1857 PetscFunctionBegin; 1858 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1859 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1860 PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE)); 1861 PetscFunctionReturn(0); 1862 } 1863 1864 /*@C 1865 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()` 1866 1867 Collective 1868 1869 Input Parameters: 1870 + sf - star forest 1871 . unit - data type 1872 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1873 1874 Output Parameter: 1875 . leafdata - leaf data to be update with personal data from each respective root 1876 1877 Level: intermediate 1878 1879 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1880 @*/ 1881 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1882 { 1883 PetscSF multi = NULL; 1884 1885 PetscFunctionBegin; 1886 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1887 PetscCall(PetscSFSetUp(sf)); 1888 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1889 PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 /*@C 1894 PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()` 1895 1896 Collective 1897 1898 Input Parameters: 1899 + sf - star forest 1900 . unit - data type 1901 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1902 1903 Output Parameter: 1904 . leafdata - leaf data to be update with personal data from each respective root 1905 1906 Level: intermediate 1907 1908 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1909 @*/ 1910 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) 1911 { 1912 PetscSF multi = NULL; 1913 1914 PetscFunctionBegin; 1915 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 1916 PetscCall(PetscSFGetMultiSF(sf, &multi)); 1917 PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE)); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1922 { 1923 PetscInt i, n, nleaves; 1924 const PetscInt *ilocal = NULL; 1925 PetscHSetI seen; 1926 1927 PetscFunctionBegin; 1928 if (PetscDefined(USE_DEBUG)) { 1929 PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL)); 1930 PetscCall(PetscHSetICreate(&seen)); 1931 for (i = 0; i < nleaves; i++) { 1932 const PetscInt leaf = ilocal ? ilocal[i] : i; 1933 PetscCall(PetscHSetIAdd(seen, leaf)); 1934 } 1935 PetscCall(PetscHSetIGetSize(seen, &n)); 1936 PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique"); 1937 PetscCall(PetscHSetIDestroy(&seen)); 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 /*@ 1943 PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view 1944 1945 Input Parameters: 1946 + sfA - The first `PetscSF` 1947 - sfB - The second `PetscSF` 1948 1949 Output Parameters: 1950 . sfBA - The composite `PetscSF` 1951 1952 Level: developer 1953 1954 Notes: 1955 Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star 1956 forests, i.e. the same leaf is not connected with different roots. 1957 1958 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1959 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1960 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1961 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1962 1963 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1964 @*/ 1965 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 1966 { 1967 const PetscSFNode *remotePointsA, *remotePointsB; 1968 PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB; 1969 const PetscInt *localPointsA, *localPointsB; 1970 PetscInt *localPointsBA; 1971 PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA; 1972 PetscBool denseB; 1973 1974 PetscFunctionBegin; 1975 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 1976 PetscSFCheckGraphSet(sfA, 1); 1977 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 1978 PetscSFCheckGraphSet(sfB, 2); 1979 PetscCheckSameComm(sfA, 1, sfB, 2); 1980 PetscValidPointer(sfBA, 3); 1981 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1982 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1983 1984 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 1985 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 1986 /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size */ 1987 /* numRootsB; otherwise, garbage will be broadcasted. */ 1988 /* Example (comm size = 1): */ 1989 /* sfA: 0 <- (0, 0) */ 1990 /* sfB: 100 <- (0, 0) */ 1991 /* 101 <- (0, 1) */ 1992 /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget */ 1993 /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */ 1994 /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on */ 1995 /* remotePointsA; if not recasted, point 101 would receive a garbage value. */ 1996 PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA)); 1997 for (i = 0; i < numRootsB; i++) { 1998 reorderedRemotePointsA[i].rank = -1; 1999 reorderedRemotePointsA[i].index = -1; 2000 } 2001 for (i = 0; i < numLeavesA; i++) { 2002 PetscInt localp = localPointsA ? localPointsA[i] : i; 2003 2004 if (localp >= numRootsB) continue; 2005 reorderedRemotePointsA[localp] = remotePointsA[i]; 2006 } 2007 remotePointsA = reorderedRemotePointsA; 2008 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2009 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB)); 2010 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2011 leafdataB[i].rank = -1; 2012 leafdataB[i].index = -1; 2013 } 2014 PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2015 PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE)); 2016 PetscCall(PetscFree(reorderedRemotePointsA)); 2017 2018 denseB = (PetscBool)!localPointsB; 2019 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2020 if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE; 2021 else numLeavesBA++; 2022 } 2023 if (denseB) { 2024 localPointsBA = NULL; 2025 remotePointsBA = leafdataB; 2026 } else { 2027 PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA)); 2028 PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA)); 2029 for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) { 2030 const PetscInt l = localPointsB ? localPointsB[i] : i; 2031 2032 if (leafdataB[l - minleaf].rank == -1) continue; 2033 remotePointsBA[numLeavesBA] = leafdataB[l - minleaf]; 2034 localPointsBA[numLeavesBA] = l; 2035 numLeavesBA++; 2036 } 2037 PetscCall(PetscFree(leafdataB)); 2038 } 2039 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2040 PetscCall(PetscSFSetFromOptions(*sfBA)); 2041 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2042 PetscFunctionReturn(0); 2043 } 2044 2045 /*@ 2046 PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one 2047 2048 Input Parameters: 2049 + sfA - The first `PetscSF` 2050 - sfB - The second `PetscSF` 2051 2052 Output Parameters: 2053 . sfBA - The composite `PetscSF`. 2054 2055 Level: developer 2056 2057 Notes: 2058 Currently, the two SFs must be defined on congruent communicators and they must be true star 2059 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2060 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2061 2062 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2063 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2064 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2065 a Reduce on sfB, on connected roots. 2066 2067 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2068 @*/ 2069 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) 2070 { 2071 const PetscSFNode *remotePointsA, *remotePointsB; 2072 PetscSFNode *remotePointsBA; 2073 const PetscInt *localPointsA, *localPointsB; 2074 PetscSFNode *reorderedRemotePointsA = NULL; 2075 PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA; 2076 MPI_Op op; 2077 #if defined(PETSC_USE_64BIT_INDICES) 2078 PetscBool iswin; 2079 #endif 2080 2081 PetscFunctionBegin; 2082 PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1); 2083 PetscSFCheckGraphSet(sfA, 1); 2084 PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2); 2085 PetscSFCheckGraphSet(sfB, 2); 2086 PetscCheckSameComm(sfA, 1, sfB, 2); 2087 PetscValidPointer(sfBA, 3); 2088 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2089 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2090 2091 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2092 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2093 2094 /* TODO: Check roots of sfB have degree of 1 */ 2095 /* Once we implement it, we can replace the MPI_MAXLOC 2096 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2097 We use MPI_MAXLOC only to have a deterministic output from this routine if 2098 the root condition is not meet. 2099 */ 2100 op = MPI_MAXLOC; 2101 #if defined(PETSC_USE_64BIT_INDICES) 2102 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2103 PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin)); 2104 if (iswin) op = MPI_REPLACE; 2105 #endif 2106 2107 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2108 PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA)); 2109 for (i = 0; i < maxleaf - minleaf + 1; i++) { 2110 reorderedRemotePointsA[i].rank = -1; 2111 reorderedRemotePointsA[i].index = -1; 2112 } 2113 if (localPointsA) { 2114 for (i = 0; i < numLeavesA; i++) { 2115 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2116 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2117 } 2118 } else { 2119 for (i = 0; i < numLeavesA; i++) { 2120 if (i > maxleaf || i < minleaf) continue; 2121 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2122 } 2123 } 2124 2125 PetscCall(PetscMalloc1(numRootsB, &localPointsBA)); 2126 PetscCall(PetscMalloc1(numRootsB, &remotePointsBA)); 2127 for (i = 0; i < numRootsB; i++) { 2128 remotePointsBA[i].rank = -1; 2129 remotePointsBA[i].index = -1; 2130 } 2131 2132 PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2133 PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op)); 2134 PetscCall(PetscFree(reorderedRemotePointsA)); 2135 for (i = 0, numLeavesBA = 0; i < numRootsB; i++) { 2136 if (remotePointsBA[i].rank == -1) continue; 2137 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2138 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2139 localPointsBA[numLeavesBA] = i; 2140 numLeavesBA++; 2141 } 2142 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA)); 2143 PetscCall(PetscSFSetFromOptions(*sfBA)); 2144 PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER)); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 /* 2149 PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF` 2150 2151 Input Parameters: 2152 . sf - The global `PetscSF` 2153 2154 Output Parameters: 2155 . out - The local `PetscSF` 2156 2157 .seealso: `PetscSF`, `PetscSFCreate()` 2158 */ 2159 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) 2160 { 2161 MPI_Comm comm; 2162 PetscMPIInt myrank; 2163 const PetscInt *ilocal; 2164 const PetscSFNode *iremote; 2165 PetscInt i, j, nroots, nleaves, lnleaves, *lilocal; 2166 PetscSFNode *liremote; 2167 PetscSF lsf; 2168 2169 PetscFunctionBegin; 2170 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2171 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out); 2172 else { 2173 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2174 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 2175 PetscCallMPI(MPI_Comm_rank(comm, &myrank)); 2176 2177 /* Find out local edges and build a local SF */ 2178 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote)); 2179 for (i = lnleaves = 0; i < nleaves; i++) { 2180 if (iremote[i].rank == (PetscInt)myrank) lnleaves++; 2181 } 2182 PetscCall(PetscMalloc1(lnleaves, &lilocal)); 2183 PetscCall(PetscMalloc1(lnleaves, &liremote)); 2184 2185 for (i = j = 0; i < nleaves; i++) { 2186 if (iremote[i].rank == (PetscInt)myrank) { 2187 lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2188 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2189 liremote[j].index = iremote[i].index; 2190 j++; 2191 } 2192 } 2193 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 2194 PetscCall(PetscSFSetFromOptions(lsf)); 2195 PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER)); 2196 PetscCall(PetscSFSetUp(lsf)); 2197 *out = lsf; 2198 } 2199 PetscFunctionReturn(0); 2200 } 2201 2202 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2203 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) 2204 { 2205 PetscMemType rootmtype, leafmtype; 2206 2207 PetscFunctionBegin; 2208 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 2209 PetscCall(PetscSFSetUp(sf)); 2210 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2211 PetscCall(PetscGetMemType(rootdata, &rootmtype)); 2212 PetscCall(PetscGetMemType(leafdata, &leafmtype)); 2213 PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata); 2214 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0)); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 /*@ 2219 PetscSFConcatenate - concatenate multiple `PetscSF` into one 2220 2221 Input Parameters: 2222 + comm - the communicator 2223 . nsfs - the number of input `PetscSF` 2224 . sfs - the array of input `PetscSF` 2225 . rootMode - the root mode specifying how roots are handled 2226 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or NULL for contiguous storage 2227 2228 Output Parameters: 2229 . newsf - The resulting `PetscSF` 2230 2231 Level: advanced 2232 2233 Notes: 2234 The communicator of all SFs in sfs must be comm. 2235 2236 Leaves are always concatenated locally, keeping them ordered by the input SF index and original local order. 2237 The offsets in leafOffsets are added to the original leaf indices. 2238 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2239 In this case, NULL is also passed as ilocal to the resulting SF. 2240 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2241 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2242 2243 All root modes retain the essential connectivity condition: 2244 If two leaves of the same input SF are connected (sharing the same root), they are also connected in the output SF. 2245 Parameter rootMode controls how the input root spaces are combined. 2246 For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input SF (checked in debug mode) and is also the same in the output SF. 2247 For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined. 2248 `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally; 2249 roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input SF and original local index, and renumbered contiguously. 2250 `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally; 2251 roots of sfs[0], sfs[1], sfs[2, ... are joined globally, ordered by input SF index and original global index, and renumbered contiguously; 2252 the original root ranks are ignored. 2253 For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, 2254 the output SF's root layout is such that the local number of roots is a sum of the input SF's local numbers of roots on each rank to keep the load balancing. 2255 However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, that means roots can move to different ranks. 2256 2257 Example: 2258 We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running 2259 $ make -C $PETSC_DIR/src/vec/is/sf/tests ex18 2260 $ for m in {local,global,shared}; do 2261 $ mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view 2262 $ done 2263 we generate two identical SFs sf_0 and sf_1, 2264 $ PetscSF Object: sf_0 2 MPI processes 2265 $ type: basic 2266 $ rank #leaves #roots 2267 $ [ 0] 4 2 2268 $ [ 1] 4 2 2269 $ leaves roots roots in global numbering 2270 $ ( 0, 0) <- ( 0, 0) = 0 2271 $ ( 0, 1) <- ( 0, 1) = 1 2272 $ ( 0, 2) <- ( 1, 0) = 2 2273 $ ( 0, 3) <- ( 1, 1) = 3 2274 $ ( 1, 0) <- ( 0, 0) = 0 2275 $ ( 1, 1) <- ( 0, 1) = 1 2276 $ ( 1, 2) <- ( 1, 0) = 2 2277 $ ( 1, 3) <- ( 1, 1) = 3 2278 and pass them to `PetscSFConcatenate()` along with different choices of rootMode, yielding different result_sf: 2279 $ rootMode = local: 2280 $ PetscSF Object: result_sf 2 MPI processes 2281 $ type: basic 2282 $ rank #leaves #roots 2283 $ [ 0] 8 4 2284 $ [ 1] 8 4 2285 $ leaves roots roots in global numbering 2286 $ ( 0, 0) <- ( 0, 0) = 0 2287 $ ( 0, 1) <- ( 0, 1) = 1 2288 $ ( 0, 2) <- ( 1, 0) = 4 2289 $ ( 0, 3) <- ( 1, 1) = 5 2290 $ ( 0, 4) <- ( 0, 2) = 2 2291 $ ( 0, 5) <- ( 0, 3) = 3 2292 $ ( 0, 6) <- ( 1, 2) = 6 2293 $ ( 0, 7) <- ( 1, 3) = 7 2294 $ ( 1, 0) <- ( 0, 0) = 0 2295 $ ( 1, 1) <- ( 0, 1) = 1 2296 $ ( 1, 2) <- ( 1, 0) = 4 2297 $ ( 1, 3) <- ( 1, 1) = 5 2298 $ ( 1, 4) <- ( 0, 2) = 2 2299 $ ( 1, 5) <- ( 0, 3) = 3 2300 $ ( 1, 6) <- ( 1, 2) = 6 2301 $ ( 1, 7) <- ( 1, 3) = 7 2302 $ 2303 $ rootMode = global: 2304 $ PetscSF Object: result_sf 2 MPI processes 2305 $ type: basic 2306 $ rank #leaves #roots 2307 $ [ 0] 8 4 2308 $ [ 1] 8 4 2309 $ leaves roots roots in global numbering 2310 $ ( 0, 0) <- ( 0, 0) = 0 2311 $ ( 0, 1) <- ( 0, 1) = 1 2312 $ ( 0, 2) <- ( 0, 2) = 2 2313 $ ( 0, 3) <- ( 0, 3) = 3 2314 $ ( 0, 4) <- ( 1, 0) = 4 2315 $ ( 0, 5) <- ( 1, 1) = 5 2316 $ ( 0, 6) <- ( 1, 2) = 6 2317 $ ( 0, 7) <- ( 1, 3) = 7 2318 $ ( 1, 0) <- ( 0, 0) = 0 2319 $ ( 1, 1) <- ( 0, 1) = 1 2320 $ ( 1, 2) <- ( 0, 2) = 2 2321 $ ( 1, 3) <- ( 0, 3) = 3 2322 $ ( 1, 4) <- ( 1, 0) = 4 2323 $ ( 1, 5) <- ( 1, 1) = 5 2324 $ ( 1, 6) <- ( 1, 2) = 6 2325 $ ( 1, 7) <- ( 1, 3) = 7 2326 $ 2327 $ rootMode = shared: 2328 $ PetscSF Object: result_sf 2 MPI processes 2329 $ type: basic 2330 $ rank #leaves #roots 2331 $ [ 0] 8 2 2332 $ [ 1] 8 2 2333 $ leaves roots roots in global numbering 2334 $ ( 0, 0) <- ( 0, 0) = 0 2335 $ ( 0, 1) <- ( 0, 1) = 1 2336 $ ( 0, 2) <- ( 1, 0) = 2 2337 $ ( 0, 3) <- ( 1, 1) = 3 2338 $ ( 0, 4) <- ( 0, 0) = 0 2339 $ ( 0, 5) <- ( 0, 1) = 1 2340 $ ( 0, 6) <- ( 1, 0) = 2 2341 $ ( 0, 7) <- ( 1, 1) = 3 2342 $ ( 1, 0) <- ( 0, 0) = 0 2343 $ ( 1, 1) <- ( 0, 1) = 1 2344 $ ( 1, 2) <- ( 1, 0) = 2 2345 $ ( 1, 3) <- ( 1, 1) = 3 2346 $ ( 1, 4) <- ( 0, 0) = 0 2347 $ ( 1, 5) <- ( 0, 1) = 1 2348 $ ( 1, 6) <- ( 1, 0) = 2 2349 $ ( 1, 7) <- ( 1, 1) = 3 2350 2351 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode` 2352 @*/ 2353 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf) 2354 { 2355 PetscInt i, s, nLeaves, nRoots; 2356 PetscInt *leafArrayOffsets; 2357 PetscInt *ilocal_new; 2358 PetscSFNode *iremote_new; 2359 PetscBool all_ilocal_null = PETSC_FALSE; 2360 PetscLayout glayout = NULL; 2361 PetscInt *gremote = NULL; 2362 PetscMPIInt rank, size; 2363 2364 PetscFunctionBegin; 2365 if (PetscDefined(USE_DEBUG)) { 2366 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2367 2368 PetscCall(PetscSFCreate(comm, &dummy)); 2369 PetscValidLogicalCollectiveInt(dummy, nsfs, 2); 2370 PetscValidPointer(sfs, 3); 2371 for (i = 0; i < nsfs; i++) { 2372 PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3); 2373 PetscCheckSameComm(dummy, 1, sfs[i], 3); 2374 } 2375 PetscValidLogicalCollectiveEnum(dummy, rootMode, 4); 2376 if (leafOffsets) PetscValidIntPointer(leafOffsets, 5); 2377 PetscValidPointer(newsf, 6); 2378 PetscCall(PetscSFDestroy(&dummy)); 2379 } 2380 if (!nsfs) { 2381 PetscCall(PetscSFCreate(comm, newsf)); 2382 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2383 PetscFunctionReturn(0); 2384 } 2385 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2386 PetscCallMPI(MPI_Comm_size(comm, &size)); 2387 2388 /* Calculate leaf array offsets */ 2389 PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets)); 2390 leafArrayOffsets[0] = 0; 2391 for (s = 0; s < nsfs; s++) { 2392 PetscInt nl; 2393 2394 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2395 leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl; 2396 } 2397 nLeaves = leafArrayOffsets[nsfs]; 2398 2399 /* Calculate number of roots */ 2400 switch (rootMode) { 2401 case PETSCSF_CONCATENATE_ROOTMODE_SHARED: { 2402 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2403 if (PetscDefined(USE_DEBUG)) { 2404 for (s = 1; s < nsfs; s++) { 2405 PetscInt nr; 2406 2407 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2408 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots); 2409 } 2410 } 2411 } break; 2412 case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: { 2413 /* Calculate also global layout in this case */ 2414 PetscInt *nls; 2415 PetscLayout *lts; 2416 PetscInt **inds; 2417 PetscInt j; 2418 PetscInt rootOffset = 0; 2419 2420 PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds)); 2421 PetscCall(PetscLayoutCreate(comm, &glayout)); 2422 glayout->bs = 1; 2423 glayout->n = 0; 2424 glayout->N = 0; 2425 for (s = 0; s < nsfs; s++) { 2426 PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s])); 2427 glayout->n += lts[s]->n; 2428 glayout->N += lts[s]->N; 2429 } 2430 PetscCall(PetscLayoutSetUp(glayout)); 2431 PetscCall(PetscMalloc1(nLeaves, &gremote)); 2432 for (s = 0, j = 0; s < nsfs; s++) { 2433 for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset; 2434 rootOffset += lts[s]->N; 2435 PetscCall(PetscLayoutDestroy(<s[s])); 2436 PetscCall(PetscFree(inds[s])); 2437 } 2438 PetscCall(PetscFree3(lts, nls, inds)); 2439 nRoots = glayout->N; 2440 } break; 2441 case PETSCSF_CONCATENATE_ROOTMODE_LOCAL: 2442 /* nRoots calculated later in this case */ 2443 break; 2444 default: 2445 SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode); 2446 } 2447 2448 if (!leafOffsets) { 2449 all_ilocal_null = PETSC_TRUE; 2450 for (s = 0; s < nsfs; s++) { 2451 const PetscInt *ilocal; 2452 2453 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2454 if (ilocal) { 2455 all_ilocal_null = PETSC_FALSE; 2456 break; 2457 } 2458 } 2459 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2460 } 2461 2462 /* Renumber and concatenate local leaves */ 2463 ilocal_new = NULL; 2464 if (!all_ilocal_null) { 2465 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2466 for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1; 2467 for (s = 0; s < nsfs; s++) { 2468 const PetscInt *ilocal; 2469 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2470 PetscInt i, nleaves_l; 2471 2472 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2473 for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2474 } 2475 } 2476 2477 /* Renumber and concatenate remote roots */ 2478 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) { 2479 PetscInt rootOffset = 0; 2480 2481 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2482 for (i = 0; i < nLeaves; i++) { 2483 iremote_new[i].rank = -1; 2484 iremote_new[i].index = -1; 2485 } 2486 for (s = 0; s < nsfs; s++) { 2487 PetscInt i, nl, nr; 2488 PetscSF tmp_sf; 2489 const PetscSFNode *iremote; 2490 PetscSFNode *tmp_rootdata; 2491 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2492 2493 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2494 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2495 /* create helper SF with contiguous leaves */ 2496 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES)); 2497 PetscCall(PetscSFSetUp(tmp_sf)); 2498 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2499 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { 2500 for (i = 0; i < nr; i++) { 2501 tmp_rootdata[i].index = i + rootOffset; 2502 tmp_rootdata[i].rank = (PetscInt)rank; 2503 } 2504 rootOffset += nr; 2505 } else { 2506 for (i = 0; i < nr; i++) { 2507 tmp_rootdata[i].index = i; 2508 tmp_rootdata[i].rank = (PetscInt)rank; 2509 } 2510 } 2511 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2512 PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2513 PetscCall(PetscSFDestroy(&tmp_sf)); 2514 PetscCall(PetscFree(tmp_rootdata)); 2515 } 2516 if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) { nRoots = rootOffset; } // else nRoots already calculated above 2517 2518 /* Build the new SF */ 2519 PetscCall(PetscSFCreate(comm, newsf)); 2520 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2521 } else { 2522 /* Build the new SF */ 2523 PetscCall(PetscSFCreate(comm, newsf)); 2524 PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote)); 2525 } 2526 PetscCall(PetscSFSetUp(*newsf)); 2527 PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view")); 2528 PetscCall(PetscLayoutDestroy(&glayout)); 2529 PetscCall(PetscFree(gremote)); 2530 PetscCall(PetscFree(leafArrayOffsets)); 2531 PetscFunctionReturn(0); 2532 } 2533