1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) do { \ 19 if (PetscUnlikely(!(sf)->graphset)) \ 20 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 21 } while (0) 22 #else 23 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window -Use MPI-3 one-sided window for communication 43 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 44 45 Level: intermediate 46 47 Notes: 48 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 49 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 50 SFs are optimized and they have better performance than general SFs. 51 52 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 53 @*/ 54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 55 { 56 PetscSF b; 57 58 PetscFunctionBegin; 59 PetscValidPointer(sf,2); 60 PetscCall(PetscSFInitializePackage()); 61 62 PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView)); 63 64 b->nroots = -1; 65 b->nleaves = -1; 66 b->minleaf = PETSC_MAX_INT; 67 b->maxleaf = PETSC_MIN_INT; 68 b->nranks = -1; 69 b->rankorder = PETSC_TRUE; 70 b->ingroup = MPI_GROUP_NULL; 71 b->outgroup = MPI_GROUP_NULL; 72 b->graphset = PETSC_FALSE; 73 #if defined(PETSC_HAVE_DEVICE) 74 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 75 b->use_stream_aware_mpi = PETSC_FALSE; 76 b->unknown_input_stream= PETSC_FALSE; 77 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 78 b->backend = PETSCSF_BACKEND_KOKKOS; 79 #elif defined(PETSC_HAVE_CUDA) 80 b->backend = PETSCSF_BACKEND_CUDA; 81 #elif defined(PETSC_HAVE_HIP) 82 b->backend = PETSCSF_BACKEND_HIP; 83 #endif 84 85 #if defined(PETSC_HAVE_NVSHMEM) 86 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 87 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 88 PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL)); 89 PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL)); 90 #endif 91 #endif 92 b->vscat.from_n = -1; 93 b->vscat.to_n = -1; 94 b->vscat.unit = MPIU_SCALAR; 95 *sf = b; 96 PetscFunctionReturn(0); 97 } 98 99 /*@ 100 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 101 102 Collective 103 104 Input Parameter: 105 . sf - star forest 106 107 Level: advanced 108 109 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 110 @*/ 111 PetscErrorCode PetscSFReset(PetscSF sf) 112 { 113 PetscFunctionBegin; 114 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 115 if (sf->ops->Reset) PetscCall((*sf->ops->Reset)(sf)); 116 sf->nroots = -1; 117 sf->nleaves = -1; 118 sf->minleaf = PETSC_MAX_INT; 119 sf->maxleaf = PETSC_MIN_INT; 120 sf->mine = NULL; 121 sf->remote = NULL; 122 sf->graphset = PETSC_FALSE; 123 PetscCall(PetscFree(sf->mine_alloc)); 124 PetscCall(PetscFree(sf->remote_alloc)); 125 sf->nranks = -1; 126 PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote)); 127 sf->degreeknown = PETSC_FALSE; 128 PetscCall(PetscFree(sf->degree)); 129 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 130 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 131 if (sf->multi) sf->multi->multi = NULL; 132 PetscCall(PetscSFDestroy(&sf->multi)); 133 PetscCall(PetscLayoutDestroy(&sf->map)); 134 135 #if defined(PETSC_HAVE_DEVICE) 136 for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i])); 137 #endif 138 139 sf->setupcalled = PETSC_FALSE; 140 PetscFunctionReturn(0); 141 } 142 143 /*@C 144 PetscSFSetType - Set the PetscSF communication implementation 145 146 Collective on PetscSF 147 148 Input Parameters: 149 + sf - the PetscSF context 150 - type - a known method 151 152 Options Database Key: 153 . -sf_type <type> - Sets the method; use -help for a list 154 of available methods (for instance, window, basic, neighbor) 155 156 Notes: 157 See "include/petscsf.h" for available methods (for instance) 158 + PETSCSFWINDOW - MPI-2/3 one-sided 159 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 160 161 Level: intermediate 162 163 .seealso: `PetscSFType`, `PetscSFCreate()` 164 @*/ 165 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 166 { 167 PetscBool match; 168 PetscErrorCode (*r)(PetscSF); 169 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 172 PetscValidCharPointer(type,2); 173 174 PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match)); 175 if (match) PetscFunctionReturn(0); 176 177 PetscCall(PetscFunctionListFind(PetscSFList,type,&r)); 178 PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 179 /* Destroy the previous PetscSF implementation context */ 180 if (sf->ops->Destroy) PetscCall((*(sf)->ops->Destroy)(sf)); 181 PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops))); 182 PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type)); 183 PetscCall((*r)(sf)); 184 PetscFunctionReturn(0); 185 } 186 187 /*@C 188 PetscSFGetType - Get the PetscSF communication implementation 189 190 Not Collective 191 192 Input Parameter: 193 . sf - the PetscSF context 194 195 Output Parameter: 196 . type - the PetscSF type name 197 198 Level: intermediate 199 200 .seealso: `PetscSFSetType()`, `PetscSFCreate()` 201 @*/ 202 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 203 { 204 PetscFunctionBegin; 205 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 206 PetscValidPointer(type,2); 207 *type = ((PetscObject)sf)->type_name; 208 PetscFunctionReturn(0); 209 } 210 211 /*@C 212 PetscSFDestroy - destroy star forest 213 214 Collective 215 216 Input Parameter: 217 . sf - address of star forest 218 219 Level: intermediate 220 221 .seealso: `PetscSFCreate()`, `PetscSFReset()` 222 @*/ 223 PetscErrorCode PetscSFDestroy(PetscSF *sf) 224 { 225 PetscFunctionBegin; 226 if (!*sf) PetscFunctionReturn(0); 227 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 228 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 229 PetscCall(PetscSFReset(*sf)); 230 if ((*sf)->ops->Destroy) PetscCall((*(*sf)->ops->Destroy)(*sf)); 231 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 232 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 233 PetscCall(PetscHeaderDestroy(sf)); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 238 { 239 PetscInt i, nleaves; 240 PetscMPIInt size; 241 const PetscInt *ilocal; 242 const PetscSFNode *iremote; 243 244 PetscFunctionBegin; 245 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 246 PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote)); 247 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size)); 248 for (i = 0; i < nleaves; i++) { 249 const PetscInt rank = iremote[i].rank; 250 const PetscInt remote = iremote[i].index; 251 const PetscInt leaf = ilocal ? ilocal[i] : i; 252 PetscCheck(rank >= 0 && rank < size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 253 PetscCheck(remote >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 254 PetscCheck(leaf >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 255 } 256 PetscFunctionReturn(0); 257 } 258 259 /*@ 260 PetscSFSetUp - set up communication structures 261 262 Collective 263 264 Input Parameter: 265 . sf - star forest communication object 266 267 Level: beginner 268 269 .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()` 270 @*/ 271 PetscErrorCode PetscSFSetUp(PetscSF sf) 272 { 273 PetscFunctionBegin; 274 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 275 PetscSFCheckGraphSet(sf,1); 276 if (sf->setupcalled) PetscFunctionReturn(0); 277 PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0)); 278 PetscCall(PetscSFCheckGraphValid_Private(sf)); 279 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */ 280 if (sf->ops->SetUp) PetscCall((*sf->ops->SetUp)(sf)); 281 #if defined(PETSC_HAVE_CUDA) 282 if (sf->backend == PETSCSF_BACKEND_CUDA) { 283 sf->ops->Malloc = PetscSFMalloc_CUDA; 284 sf->ops->Free = PetscSFFree_CUDA; 285 } 286 #endif 287 #if defined(PETSC_HAVE_HIP) 288 if (sf->backend == PETSCSF_BACKEND_HIP) { 289 sf->ops->Malloc = PetscSFMalloc_HIP; 290 sf->ops->Free = PetscSFFree_HIP; 291 } 292 #endif 293 294 # 295 #if defined(PETSC_HAVE_KOKKOS) 296 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 297 sf->ops->Malloc = PetscSFMalloc_Kokkos; 298 sf->ops->Free = PetscSFFree_Kokkos; 299 } 300 #endif 301 PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0)); 302 sf->setupcalled = PETSC_TRUE; 303 PetscFunctionReturn(0); 304 } 305 306 /*@ 307 PetscSFSetFromOptions - set PetscSF options using the options database 308 309 Logically Collective 310 311 Input Parameter: 312 . sf - star forest 313 314 Options Database Keys: 315 + -sf_type - implementation type, see PetscSFSetType() 316 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 317 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 318 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 319 If true, this option only works with -use_gpu_aware_mpi 1. 320 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 321 If true, this option only works with -use_gpu_aware_mpi 1. 322 323 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 324 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 325 the only available is kokkos. 326 327 Level: intermediate 328 @*/ 329 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 330 { 331 PetscSFType deft; 332 char type[256]; 333 PetscBool flg; 334 335 PetscFunctionBegin; 336 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 337 PetscObjectOptionsBegin((PetscObject)sf); 338 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 339 PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg)); 340 PetscCall(PetscSFSetType(sf,flg ? type : deft)); 341 PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL)); 342 #if defined(PETSC_HAVE_DEVICE) 343 { 344 char backendstr[32] = {0}; 345 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 346 /* Change the defaults set in PetscSFCreate() with command line options */ 347 PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL)); 348 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL)); 349 PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set)); 350 PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda)); 351 PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos)); 352 PetscCall(PetscStrcasecmp("hip",backendstr,&isHip)); 353 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 354 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 355 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 356 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 357 else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 358 #elif defined(PETSC_HAVE_KOKKOS) 359 PetscCheck(!set || isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 360 #endif 361 } 362 #endif 363 if (sf->ops->SetFromOptions) PetscCall((*sf->ops->SetFromOptions)(PetscOptionsObject,sf)); 364 PetscOptionsEnd(); 365 PetscFunctionReturn(0); 366 } 367 368 /*@ 369 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 370 371 Logically Collective 372 373 Input Parameters: 374 + sf - star forest 375 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 376 377 Level: advanced 378 379 .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 380 @*/ 381 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 382 { 383 PetscFunctionBegin; 384 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 385 PetscValidLogicalCollectiveBool(sf,flg,2); 386 PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 387 sf->rankorder = flg; 388 PetscFunctionReturn(0); 389 } 390 391 /*@C 392 PetscSFSetGraph - Set a parallel star forest 393 394 Collective 395 396 Input Parameters: 397 + sf - star forest 398 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 399 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 400 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 401 during setup in debug mode) 402 . localmode - copy mode for ilocal 403 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 404 during setup in debug mode) 405 - remotemode - copy mode for iremote 406 407 Level: intermediate 408 409 Notes: 410 Leaf indices in ilocal must be unique, otherwise an error occurs. 411 412 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 413 In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 414 PETSc might modify the respective array; 415 if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 416 Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 417 418 Fortran Notes: 419 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 420 421 Developer Notes: 422 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 423 This also allows to compare leaf sets of two SFs easily. 424 425 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 426 @*/ 427 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode) 428 { 429 PetscBool unique, contiguous; 430 431 PetscFunctionBegin; 432 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 433 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 434 if (nleaves > 0) PetscValidPointer(iremote,6); 435 PetscCheck(nroots >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 436 PetscCheck(nleaves >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 437 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 438 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 439 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode); 440 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode); 441 442 if (sf->nroots >= 0) { /* Reset only if graph already set */ 443 PetscCall(PetscSFReset(sf)); 444 } 445 446 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0)); 447 448 sf->nroots = nroots; 449 sf->nleaves = nleaves; 450 451 if (localmode == PETSC_COPY_VALUES && ilocal) { 452 PetscInt *tlocal = NULL; 453 454 PetscCall(PetscMalloc1(nleaves,&tlocal)); 455 PetscCall(PetscArraycpy(tlocal,ilocal,nleaves)); 456 ilocal = tlocal; 457 } 458 if (remotemode == PETSC_COPY_VALUES) { 459 PetscSFNode *tremote = NULL; 460 461 PetscCall(PetscMalloc1(nleaves,&tremote)); 462 PetscCall(PetscArraycpy(tremote,iremote,nleaves)); 463 iremote = tremote; 464 } 465 466 if (nleaves && ilocal) { 467 PetscSFNode work; 468 469 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 470 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 471 unique = PetscNot(unique); 472 PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF"); 473 sf->minleaf = ilocal[0]; 474 sf->maxleaf = ilocal[nleaves-1]; 475 contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1); 476 } else { 477 sf->minleaf = 0; 478 sf->maxleaf = nleaves - 1; 479 unique = PETSC_TRUE; 480 contiguous = PETSC_TRUE; 481 } 482 483 if (contiguous) { 484 if (localmode == PETSC_USE_POINTER) { 485 ilocal = NULL; 486 } else { 487 PetscCall(PetscFree(ilocal)); 488 } 489 } 490 sf->mine = ilocal; 491 if (localmode == PETSC_USE_POINTER) { 492 sf->mine_alloc = NULL; 493 } else { 494 sf->mine_alloc = ilocal; 495 } 496 sf->remote = iremote; 497 if (remotemode == PETSC_USE_POINTER) { 498 sf->remote_alloc = NULL; 499 } else { 500 sf->remote_alloc = iremote; 501 } 502 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0)); 503 sf->graphset = PETSC_TRUE; 504 PetscFunctionReturn(0); 505 } 506 507 /*@ 508 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 509 510 Collective 511 512 Input Parameters: 513 + sf - The PetscSF 514 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 515 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 516 517 Notes: 518 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 519 n and N are local and global sizes of x respectively. 520 521 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 522 sequential vectors y on all ranks. 523 524 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 525 sequential vector y on rank 0. 526 527 In above cases, entries of x are roots and entries of y are leaves. 528 529 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 530 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 531 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 532 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 533 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 534 535 In this case, roots and leaves are symmetric. 536 537 Level: intermediate 538 @*/ 539 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 540 { 541 MPI_Comm comm; 542 PetscInt n,N,res[2]; 543 PetscMPIInt rank,size; 544 PetscSFType type; 545 546 PetscFunctionBegin; 547 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 548 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 549 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 550 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 551 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 552 PetscCallMPI(MPI_Comm_size(comm,&size)); 553 554 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 555 type = PETSCSFALLTOALL; 556 PetscCall(PetscLayoutCreate(comm,&sf->map)); 557 PetscCall(PetscLayoutSetLocalSize(sf->map,size)); 558 PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size)); 559 PetscCall(PetscLayoutSetUp(sf->map)); 560 } else { 561 PetscCall(PetscLayoutGetLocalSize(map,&n)); 562 PetscCall(PetscLayoutGetSize(map,&N)); 563 res[0] = n; 564 res[1] = -n; 565 /* Check if n are same over all ranks so that we can optimize it */ 566 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm)); 567 if (res[0] == -res[1]) { /* same n */ 568 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 569 } else { 570 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 571 } 572 PetscCall(PetscLayoutReference(map,&sf->map)); 573 } 574 PetscCall(PetscSFSetType(sf,type)); 575 576 sf->pattern = pattern; 577 sf->mine = NULL; /* Contiguous */ 578 579 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 580 Also set other easy stuff. 581 */ 582 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 583 sf->nleaves = N; 584 sf->nroots = n; 585 sf->nranks = size; 586 sf->minleaf = 0; 587 sf->maxleaf = N - 1; 588 } else if (pattern == PETSCSF_PATTERN_GATHER) { 589 sf->nleaves = rank ? 0 : N; 590 sf->nroots = n; 591 sf->nranks = rank ? 0 : size; 592 sf->minleaf = 0; 593 sf->maxleaf = rank ? -1 : N - 1; 594 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 595 sf->nleaves = size; 596 sf->nroots = size; 597 sf->nranks = size; 598 sf->minleaf = 0; 599 sf->maxleaf = size - 1; 600 } 601 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 602 sf->graphset = PETSC_TRUE; 603 PetscFunctionReturn(0); 604 } 605 606 /*@ 607 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 608 609 Collective 610 611 Input Parameter: 612 . sf - star forest to invert 613 614 Output Parameter: 615 . isf - inverse of sf 616 617 Level: advanced 618 619 Notes: 620 All roots must have degree 1. 621 622 The local space may be a permutation, but cannot be sparse. 623 624 .seealso: `PetscSFSetGraph()` 625 @*/ 626 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 627 { 628 PetscMPIInt rank; 629 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 630 const PetscInt *ilocal; 631 PetscSFNode *roots,*leaves; 632 633 PetscFunctionBegin; 634 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 635 PetscSFCheckGraphSet(sf,1); 636 PetscValidPointer(isf,2); 637 638 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL)); 639 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 640 641 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 642 PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves)); 643 for (i=0; i<maxlocal; i++) { 644 leaves[i].rank = rank; 645 leaves[i].index = i; 646 } 647 for (i=0; i <nroots; i++) { 648 roots[i].rank = -1; 649 roots[i].index = -1; 650 } 651 PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE)); 652 PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE)); 653 654 /* Check whether our leaves are sparse */ 655 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 656 if (count == nroots) newilocal = NULL; 657 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 658 PetscCall(PetscMalloc1(count,&newilocal)); 659 for (i=0,count=0; i<nroots; i++) { 660 if (roots[i].rank >= 0) { 661 newilocal[count] = i; 662 roots[count].rank = roots[i].rank; 663 roots[count].index = roots[i].index; 664 count++; 665 } 666 } 667 } 668 669 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf)); 670 PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES)); 671 PetscCall(PetscFree2(roots,leaves)); 672 PetscFunctionReturn(0); 673 } 674 675 /*@ 676 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 677 678 Collective 679 680 Input Parameters: 681 + sf - communication object to duplicate 682 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 683 684 Output Parameter: 685 . newsf - new communication object 686 687 Level: beginner 688 689 .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 690 @*/ 691 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 692 { 693 PetscSFType type; 694 MPI_Datatype dtype=MPIU_SCALAR; 695 696 PetscFunctionBegin; 697 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 698 PetscValidLogicalCollectiveEnum(sf,opt,2); 699 PetscValidPointer(newsf,3); 700 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf)); 701 PetscCall(PetscSFGetType(sf,&type)); 702 if (type) PetscCall(PetscSFSetType(*newsf,type)); 703 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */ 704 if (opt == PETSCSF_DUPLICATE_GRAPH) { 705 PetscSFCheckGraphSet(sf,1); 706 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 707 PetscInt nroots,nleaves; 708 const PetscInt *ilocal; 709 const PetscSFNode *iremote; 710 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 711 PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES)); 712 } else { 713 PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern)); 714 } 715 } 716 /* Since oldtype is committed, so is newtype, according to MPI */ 717 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype)); 718 (*newsf)->vscat.bs = sf->vscat.bs; 719 (*newsf)->vscat.unit = dtype; 720 (*newsf)->vscat.to_n = sf->vscat.to_n; 721 (*newsf)->vscat.from_n = sf->vscat.from_n; 722 /* Do not copy lsf. Build it on demand since it is rarely used */ 723 724 #if defined(PETSC_HAVE_DEVICE) 725 (*newsf)->backend = sf->backend; 726 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 727 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 728 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 729 #endif 730 if (sf->ops->Duplicate) PetscCall((*sf->ops->Duplicate)(sf,opt,*newsf)); 731 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 732 PetscFunctionReturn(0); 733 } 734 735 /*@C 736 PetscSFGetGraph - Get the graph specifying a parallel star forest 737 738 Not Collective 739 740 Input Parameter: 741 . sf - star forest 742 743 Output Parameters: 744 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 745 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 746 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 747 - iremote - remote locations of root vertices for each leaf on the current process 748 749 Notes: 750 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 751 752 The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 753 see its manpage for details. 754 755 Fortran Notes: 756 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 757 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 758 759 To check for a NULL ilocal use 760 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 761 762 Level: intermediate 763 764 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 765 @*/ 766 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 767 { 768 PetscFunctionBegin; 769 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 770 if (sf->ops->GetGraph) { 771 PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote)); 772 } else { 773 if (nroots) *nroots = sf->nroots; 774 if (nleaves) *nleaves = sf->nleaves; 775 if (ilocal) *ilocal = sf->mine; 776 if (iremote) *iremote = sf->remote; 777 } 778 PetscFunctionReturn(0); 779 } 780 781 /*@ 782 PetscSFGetLeafRange - Get the active leaf ranges 783 784 Not Collective 785 786 Input Parameter: 787 . sf - star forest 788 789 Output Parameters: 790 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 791 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 792 793 Level: developer 794 795 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 796 @*/ 797 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 798 { 799 PetscFunctionBegin; 800 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 801 PetscSFCheckGraphSet(sf,1); 802 if (minleaf) *minleaf = sf->minleaf; 803 if (maxleaf) *maxleaf = sf->maxleaf; 804 PetscFunctionReturn(0); 805 } 806 807 /*@C 808 PetscSFViewFromOptions - View from Options 809 810 Collective on PetscSF 811 812 Input Parameters: 813 + A - the star forest 814 . obj - Optional object 815 - name - command line option 816 817 Level: intermediate 818 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 819 @*/ 820 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 821 { 822 PetscFunctionBegin; 823 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 824 PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name)); 825 PetscFunctionReturn(0); 826 } 827 828 /*@C 829 PetscSFView - view a star forest 830 831 Collective 832 833 Input Parameters: 834 + sf - star forest 835 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 836 837 Level: beginner 838 839 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()` 840 @*/ 841 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 842 { 843 PetscBool iascii; 844 PetscViewerFormat format; 845 846 PetscFunctionBegin; 847 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 848 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer)); 849 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 850 PetscCheckSameComm(sf,1,viewer,2); 851 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 852 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 853 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 854 PetscMPIInt rank; 855 PetscInt ii,i,j; 856 857 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer)); 858 PetscCall(PetscViewerASCIIPushTab(viewer)); 859 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 860 if (!sf->graphset) { 861 PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n")); 862 PetscCall(PetscViewerASCIIPopTab(viewer)); 863 PetscFunctionReturn(0); 864 } 865 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 866 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 867 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks)); 868 for (i=0; i<sf->nleaves; i++) { 869 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index)); 870 } 871 PetscCall(PetscViewerFlush(viewer)); 872 PetscCall(PetscViewerGetFormat(viewer,&format)); 873 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 874 PetscMPIInt *tmpranks,*perm; 875 PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm)); 876 PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks)); 877 for (i=0; i<sf->nranks; i++) perm[i] = i; 878 PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm)); 879 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank)); 880 for (ii=0; ii<sf->nranks; ii++) { 881 i = perm[ii]; 882 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i])); 883 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 884 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j])); 885 } 886 } 887 PetscCall(PetscFree2(tmpranks,perm)); 888 } 889 PetscCall(PetscViewerFlush(viewer)); 890 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 891 } 892 PetscCall(PetscViewerASCIIPopTab(viewer)); 893 } 894 if (sf->ops->View) PetscCall((*sf->ops->View)(sf,viewer)); 895 PetscFunctionReturn(0); 896 } 897 898 /*@C 899 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 900 901 Not Collective 902 903 Input Parameter: 904 . sf - star forest 905 906 Output Parameters: 907 + nranks - number of ranks referenced by local part 908 . ranks - array of ranks 909 . roffset - offset in rmine/rremote for each rank (length nranks+1) 910 . rmine - concatenated array holding local indices referencing each remote rank 911 - rremote - concatenated array holding remote indices referenced for each remote rank 912 913 Level: developer 914 915 .seealso: `PetscSFGetLeafRanks()` 916 @*/ 917 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 918 { 919 PetscFunctionBegin; 920 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 921 PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 922 if (sf->ops->GetRootRanks) { 923 PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote)); 924 } else { 925 /* The generic implementation */ 926 if (nranks) *nranks = sf->nranks; 927 if (ranks) *ranks = sf->ranks; 928 if (roffset) *roffset = sf->roffset; 929 if (rmine) *rmine = sf->rmine; 930 if (rremote) *rremote = sf->rremote; 931 } 932 PetscFunctionReturn(0); 933 } 934 935 /*@C 936 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 937 938 Not Collective 939 940 Input Parameter: 941 . sf - star forest 942 943 Output Parameters: 944 + niranks - number of leaf ranks referencing roots on this process 945 . iranks - array of ranks 946 . ioffset - offset in irootloc for each rank (length niranks+1) 947 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 948 949 Level: developer 950 951 .seealso: `PetscSFGetRootRanks()` 952 @*/ 953 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 954 { 955 PetscFunctionBegin; 956 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 957 PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 958 if (sf->ops->GetLeafRanks) { 959 PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc)); 960 } else { 961 PetscSFType type; 962 PetscCall(PetscSFGetType(sf,&type)); 963 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 964 } 965 PetscFunctionReturn(0); 966 } 967 968 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 969 PetscInt i; 970 for (i=0; i<n; i++) { 971 if (needle == list[i]) return PETSC_TRUE; 972 } 973 return PETSC_FALSE; 974 } 975 976 /*@C 977 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 978 979 Collective 980 981 Input Parameters: 982 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 983 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 984 985 Level: developer 986 987 .seealso: `PetscSFGetRootRanks()` 988 @*/ 989 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 990 { 991 PetscTable table; 992 PetscTablePosition pos; 993 PetscMPIInt size,groupsize,*groupranks; 994 PetscInt *rcount,*ranks; 995 PetscInt i, irank = -1,orank = -1; 996 997 PetscFunctionBegin; 998 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 999 PetscSFCheckGraphSet(sf,1); 1000 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size)); 1001 PetscCall(PetscTableCreate(10,size,&table)); 1002 for (i=0; i<sf->nleaves; i++) { 1003 /* Log 1-based rank */ 1004 PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES)); 1005 } 1006 PetscCall(PetscTableGetCount(table,&sf->nranks)); 1007 PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote)); 1008 PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks)); 1009 PetscCall(PetscTableGetHeadPosition(table,&pos)); 1010 for (i=0; i<sf->nranks; i++) { 1011 PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i])); 1012 ranks[i]--; /* Convert back to 0-based */ 1013 } 1014 PetscCall(PetscTableDestroy(&table)); 1015 1016 /* We expect that dgroup is reliably "small" while nranks could be large */ 1017 { 1018 MPI_Group group = MPI_GROUP_NULL; 1019 PetscMPIInt *dgroupranks; 1020 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1021 PetscCallMPI(MPI_Group_size(dgroup,&groupsize)); 1022 PetscCall(PetscMalloc1(groupsize,&dgroupranks)); 1023 PetscCall(PetscMalloc1(groupsize,&groupranks)); 1024 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1025 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks)); 1026 PetscCallMPI(MPI_Group_free(&group)); 1027 PetscCall(PetscFree(dgroupranks)); 1028 } 1029 1030 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1031 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1032 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1033 if (InList(ranks[i],groupsize,groupranks)) break; 1034 } 1035 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1036 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1037 } 1038 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1039 PetscInt tmprank,tmpcount; 1040 1041 tmprank = ranks[i]; 1042 tmpcount = rcount[i]; 1043 ranks[i] = ranks[sf->ndranks]; 1044 rcount[i] = rcount[sf->ndranks]; 1045 ranks[sf->ndranks] = tmprank; 1046 rcount[sf->ndranks] = tmpcount; 1047 sf->ndranks++; 1048 } 1049 } 1050 PetscCall(PetscFree(groupranks)); 1051 PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount)); 1052 PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks)); 1053 sf->roffset[0] = 0; 1054 for (i=0; i<sf->nranks; i++) { 1055 PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i)); 1056 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1057 rcount[i] = 0; 1058 } 1059 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1060 /* short circuit */ 1061 if (orank != sf->remote[i].rank) { 1062 /* Search for index of iremote[i].rank in sf->ranks */ 1063 PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank)); 1064 if (irank < 0) { 1065 PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank)); 1066 if (irank >= 0) irank += sf->ndranks; 1067 } 1068 orank = sf->remote[i].rank; 1069 } 1070 PetscCheck(irank >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1071 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1072 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1073 rcount[irank]++; 1074 } 1075 PetscCall(PetscFree2(rcount,ranks)); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 /*@C 1080 PetscSFGetGroups - gets incoming and outgoing process groups 1081 1082 Collective 1083 1084 Input Parameter: 1085 . sf - star forest 1086 1087 Output Parameters: 1088 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1089 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1090 1091 Level: developer 1092 1093 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1094 @*/ 1095 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1096 { 1097 MPI_Group group = MPI_GROUP_NULL; 1098 1099 PetscFunctionBegin; 1100 PetscCheck(sf->nranks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1101 if (sf->ingroup == MPI_GROUP_NULL) { 1102 PetscInt i; 1103 const PetscInt *indegree; 1104 PetscMPIInt rank,*outranks,*inranks; 1105 PetscSFNode *remote; 1106 PetscSF bgcount; 1107 1108 /* Compute the number of incoming ranks */ 1109 PetscCall(PetscMalloc1(sf->nranks,&remote)); 1110 for (i=0; i<sf->nranks; i++) { 1111 remote[i].rank = sf->ranks[i]; 1112 remote[i].index = 0; 1113 } 1114 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount)); 1115 PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER)); 1116 PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree)); 1117 PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree)); 1118 /* Enumerate the incoming ranks */ 1119 PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks)); 1120 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 1121 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1122 PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks)); 1123 PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks)); 1124 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1125 PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup)); 1126 PetscCallMPI(MPI_Group_free(&group)); 1127 PetscCall(PetscFree2(inranks,outranks)); 1128 PetscCall(PetscSFDestroy(&bgcount)); 1129 } 1130 *incoming = sf->ingroup; 1131 1132 if (sf->outgroup == MPI_GROUP_NULL) { 1133 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1134 PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup)); 1135 PetscCallMPI(MPI_Group_free(&group)); 1136 } 1137 *outgoing = sf->outgroup; 1138 PetscFunctionReturn(0); 1139 } 1140 1141 /*@ 1142 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1143 1144 Collective 1145 1146 Input Parameter: 1147 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1148 1149 Output Parameter: 1150 . multi - star forest with split roots, such that each root has degree exactly 1 1151 1152 Level: developer 1153 1154 Notes: 1155 1156 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1157 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1158 edge, it is a candidate for future optimization that might involve its removal. 1159 1160 .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1161 @*/ 1162 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1163 { 1164 PetscFunctionBegin; 1165 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1166 PetscValidPointer(multi,2); 1167 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1168 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi)); 1169 *multi = sf->multi; 1170 sf->multi->multi = sf->multi; 1171 PetscFunctionReturn(0); 1172 } 1173 if (!sf->multi) { 1174 const PetscInt *indegree; 1175 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1176 PetscSFNode *remote; 1177 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1178 PetscCall(PetscSFComputeDegreeBegin(sf,&indegree)); 1179 PetscCall(PetscSFComputeDegreeEnd(sf,&indegree)); 1180 PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset)); 1181 inoffset[0] = 0; 1182 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1183 for (i=0; i<maxlocal; i++) outones[i] = 1; 1184 PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM)); 1185 PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM)); 1186 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1187 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1188 for (i=0; i<sf->nroots; i++) { 1189 PetscCheck(inoffset[i] + indegree[i] == inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1190 } 1191 } 1192 PetscCall(PetscMalloc1(sf->nleaves,&remote)); 1193 for (i=0; i<sf->nleaves; i++) { 1194 remote[i].rank = sf->remote[i].rank; 1195 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1196 } 1197 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi)); 1198 sf->multi->multi = sf->multi; 1199 PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER)); 1200 if (sf->rankorder) { /* Sort the ranks */ 1201 PetscMPIInt rank; 1202 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1203 PetscSFNode *newremote; 1204 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 1205 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1206 PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset)); 1207 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1208 PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE)); 1209 PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE)); 1210 /* Sort the incoming ranks at each vertex, build the inverse map */ 1211 for (i=0; i<sf->nroots; i++) { 1212 PetscInt j; 1213 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1214 PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset)); 1215 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1216 } 1217 PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE)); 1218 PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE)); 1219 PetscCall(PetscMalloc1(sf->nleaves,&newremote)); 1220 for (i=0; i<sf->nleaves; i++) { 1221 newremote[i].rank = sf->remote[i].rank; 1222 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1223 } 1224 PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER)); 1225 PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset)); 1226 } 1227 PetscCall(PetscFree3(inoffset,outones,outoffset)); 1228 } 1229 *multi = sf->multi; 1230 PetscFunctionReturn(0); 1231 } 1232 1233 /*@C 1234 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1235 1236 Collective 1237 1238 Input Parameters: 1239 + sf - original star forest 1240 . nselected - number of selected roots on this process 1241 - selected - indices of the selected roots on this process 1242 1243 Output Parameter: 1244 . esf - new star forest 1245 1246 Level: advanced 1247 1248 Note: 1249 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1250 be done by calling PetscSFGetGraph(). 1251 1252 .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()` 1253 @*/ 1254 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1255 { 1256 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1257 const PetscInt *ilocal; 1258 signed char *rootdata,*leafdata,*leafmem; 1259 const PetscSFNode *iremote; 1260 PetscSFNode *new_iremote; 1261 MPI_Comm comm; 1262 1263 PetscFunctionBegin; 1264 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1265 PetscSFCheckGraphSet(sf,1); 1266 if (nselected) PetscValidIntPointer(selected,3); 1267 PetscValidPointer(esf,4); 1268 1269 PetscCall(PetscSFSetUp(sf)); 1270 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0)); 1271 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 1272 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 1273 1274 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1275 PetscBool dups; 1276 PetscCall(PetscCheckDupsInt(nselected,selected,&dups)); 1277 PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1278 for (i=0; i<nselected; i++) 1279 PetscCheck(selected[i] >= 0 && selected[i] < nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1280 } 1281 1282 if (sf->ops->CreateEmbeddedRootSF) { 1283 PetscCall((*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf)); 1284 } else { 1285 /* A generic version of creating embedded sf */ 1286 PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf)); 1287 maxlocal = maxleaf - minleaf + 1; 1288 PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem)); 1289 leafdata = leafmem - minleaf; 1290 /* Tag selected roots and bcast to leaves */ 1291 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1292 PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE)); 1293 PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE)); 1294 1295 /* Build esf with leaves that are still connected */ 1296 esf_nleaves = 0; 1297 for (i=0; i<nleaves; i++) { 1298 j = ilocal ? ilocal[i] : i; 1299 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1300 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1301 */ 1302 esf_nleaves += (leafdata[j] ? 1 : 0); 1303 } 1304 PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal)); 1305 PetscCall(PetscMalloc1(esf_nleaves,&new_iremote)); 1306 for (i=n=0; i<nleaves; i++) { 1307 j = ilocal ? ilocal[i] : i; 1308 if (leafdata[j]) { 1309 new_ilocal[n] = j; 1310 new_iremote[n].rank = iremote[i].rank; 1311 new_iremote[n].index = iremote[i].index; 1312 ++n; 1313 } 1314 } 1315 PetscCall(PetscSFCreate(comm,esf)); 1316 PetscCall(PetscSFSetFromOptions(*esf)); 1317 PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 1318 PetscCall(PetscFree2(rootdata,leafmem)); 1319 } 1320 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0)); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 /*@C 1325 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1326 1327 Collective 1328 1329 Input Parameters: 1330 + sf - original star forest 1331 . nselected - number of selected leaves on this process 1332 - selected - indices of the selected leaves on this process 1333 1334 Output Parameter: 1335 . newsf - new star forest 1336 1337 Level: advanced 1338 1339 .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1340 @*/ 1341 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1342 { 1343 const PetscSFNode *iremote; 1344 PetscSFNode *new_iremote; 1345 const PetscInt *ilocal; 1346 PetscInt i,nroots,*leaves,*new_ilocal; 1347 MPI_Comm comm; 1348 1349 PetscFunctionBegin; 1350 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1351 PetscSFCheckGraphSet(sf,1); 1352 if (nselected) PetscValidIntPointer(selected,3); 1353 PetscValidPointer(newsf,4); 1354 1355 /* Uniq selected[] and put results in leaves[] */ 1356 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 1357 PetscCall(PetscMalloc1(nselected,&leaves)); 1358 PetscCall(PetscArraycpy(leaves,selected,nselected)); 1359 PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves)); 1360 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1361 1362 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1363 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1364 PetscCall((*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf)); 1365 } else { 1366 PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote)); 1367 PetscCall(PetscMalloc1(nselected,&new_ilocal)); 1368 PetscCall(PetscMalloc1(nselected,&new_iremote)); 1369 for (i=0; i<nselected; ++i) { 1370 const PetscInt l = leaves[i]; 1371 new_ilocal[i] = ilocal ? ilocal[l] : l; 1372 new_iremote[i].rank = iremote[l].rank; 1373 new_iremote[i].index = iremote[l].index; 1374 } 1375 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf)); 1376 PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 1377 } 1378 PetscCall(PetscFree(leaves)); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 /*@C 1383 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1384 1385 Collective on PetscSF 1386 1387 Input Parameters: 1388 + sf - star forest on which to communicate 1389 . unit - data type associated with each node 1390 . rootdata - buffer to broadcast 1391 - op - operation to use for reduction 1392 1393 Output Parameter: 1394 . leafdata - buffer to be reduced with values from each leaf's respective root 1395 1396 Level: intermediate 1397 1398 Notes: 1399 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1400 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1401 use PetscSFBcastWithMemTypeBegin() instead. 1402 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1403 @*/ 1404 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1405 { 1406 PetscMemType rootmtype,leafmtype; 1407 1408 PetscFunctionBegin; 1409 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1410 PetscCall(PetscSFSetUp(sf)); 1411 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 1412 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1413 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1414 PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op)); 1415 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 /*@C 1420 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1421 1422 Collective on PetscSF 1423 1424 Input Parameters: 1425 + sf - star forest on which to communicate 1426 . unit - data type associated with each node 1427 . rootmtype - memory type of rootdata 1428 . rootdata - buffer to broadcast 1429 . leafmtype - memory type of leafdata 1430 - op - operation to use for reduction 1431 1432 Output Parameter: 1433 . leafdata - buffer to be reduced with values from each leaf's respective root 1434 1435 Level: intermediate 1436 1437 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1438 @*/ 1439 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1440 { 1441 PetscFunctionBegin; 1442 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1443 PetscCall(PetscSFSetUp(sf)); 1444 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 1445 PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op)); 1446 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 1447 PetscFunctionReturn(0); 1448 } 1449 1450 /*@C 1451 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1452 1453 Collective 1454 1455 Input Parameters: 1456 + sf - star forest 1457 . unit - data type 1458 . rootdata - buffer to broadcast 1459 - op - operation to use for reduction 1460 1461 Output Parameter: 1462 . leafdata - buffer to be reduced with values from each leaf's respective root 1463 1464 Level: intermediate 1465 1466 .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1467 @*/ 1468 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1469 { 1470 PetscFunctionBegin; 1471 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1472 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0)); 1473 PetscCall((*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op)); 1474 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0)); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 /*@C 1479 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1480 1481 Collective 1482 1483 Input Parameters: 1484 + sf - star forest 1485 . unit - data type 1486 . leafdata - values to reduce 1487 - op - reduction operation 1488 1489 Output Parameter: 1490 . rootdata - result of reduction of values from all leaves of each root 1491 1492 Level: intermediate 1493 1494 Notes: 1495 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1496 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1497 use PetscSFReduceWithMemTypeBegin() instead. 1498 1499 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()` 1500 @*/ 1501 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1502 { 1503 PetscMemType rootmtype,leafmtype; 1504 1505 PetscFunctionBegin; 1506 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1507 PetscCall(PetscSFSetUp(sf)); 1508 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0)); 1509 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1510 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1511 PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op)); 1512 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0)); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 /*@C 1517 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1518 1519 Collective 1520 1521 Input Parameters: 1522 + sf - star forest 1523 . unit - data type 1524 . leafmtype - memory type of leafdata 1525 . leafdata - values to reduce 1526 . rootmtype - memory type of rootdata 1527 - op - reduction operation 1528 1529 Output Parameter: 1530 . rootdata - result of reduction of values from all leaves of each root 1531 1532 Level: intermediate 1533 1534 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()` 1535 @*/ 1536 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1537 { 1538 PetscFunctionBegin; 1539 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1540 PetscCall(PetscSFSetUp(sf)); 1541 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0)); 1542 PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op)); 1543 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0)); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 /*@C 1548 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1549 1550 Collective 1551 1552 Input Parameters: 1553 + sf - star forest 1554 . unit - data type 1555 . leafdata - values to reduce 1556 - op - reduction operation 1557 1558 Output Parameter: 1559 . rootdata - result of reduction of values from all leaves of each root 1560 1561 Level: intermediate 1562 1563 .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()` 1564 @*/ 1565 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1566 { 1567 PetscFunctionBegin; 1568 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1569 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0)); 1570 PetscCall((*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op)); 1571 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0)); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 /*@C 1576 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1577 1578 Collective 1579 1580 Input Parameters: 1581 + sf - star forest 1582 . unit - data type 1583 . leafdata - leaf values to use in reduction 1584 - op - operation to use for reduction 1585 1586 Output Parameters: 1587 + rootdata - root values to be updated, input state is seen by first process to perform an update 1588 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1589 1590 Level: advanced 1591 1592 Note: 1593 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1594 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1595 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1596 integers. 1597 1598 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1599 @*/ 1600 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1601 { 1602 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1603 1604 PetscFunctionBegin; 1605 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1606 PetscCall(PetscSFSetUp(sf)); 1607 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1608 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1609 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1610 PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype)); 1611 PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1612 PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op)); 1613 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 /*@C 1618 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1619 1620 Collective 1621 1622 Input Parameters: 1623 + sf - star forest 1624 . unit - data type 1625 . rootmtype - memory type of rootdata 1626 . leafmtype - memory type of leafdata 1627 . leafdata - leaf values to use in reduction 1628 . leafupdatemtype - memory type of leafupdate 1629 - op - operation to use for reduction 1630 1631 Output Parameters: 1632 + rootdata - root values to be updated, input state is seen by first process to perform an update 1633 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1634 1635 Level: advanced 1636 1637 Note: See PetscSFFetchAndOpBegin() for more details. 1638 1639 .seealso: `PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1640 @*/ 1641 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op) 1642 { 1643 PetscFunctionBegin; 1644 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1645 PetscCall(PetscSFSetUp(sf)); 1646 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1647 PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1648 PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op)); 1649 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1650 PetscFunctionReturn(0); 1651 } 1652 1653 /*@C 1654 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1655 1656 Collective 1657 1658 Input Parameters: 1659 + sf - star forest 1660 . unit - data type 1661 . leafdata - leaf values to use in reduction 1662 - op - operation to use for reduction 1663 1664 Output Parameters: 1665 + rootdata - root values to be updated, input state is seen by first process to perform an update 1666 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1667 1668 Level: advanced 1669 1670 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()` 1671 @*/ 1672 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1673 { 1674 PetscFunctionBegin; 1675 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1676 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0)); 1677 PetscCall((*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op)); 1678 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0)); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 /*@C 1683 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1684 1685 Collective 1686 1687 Input Parameter: 1688 . sf - star forest 1689 1690 Output Parameter: 1691 . degree - degree of each root vertex 1692 1693 Level: advanced 1694 1695 Notes: 1696 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1697 1698 .seealso: `PetscSFGatherBegin()` 1699 @*/ 1700 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1701 { 1702 PetscFunctionBegin; 1703 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1704 PetscSFCheckGraphSet(sf,1); 1705 PetscValidPointer(degree,2); 1706 if (!sf->degreeknown) { 1707 PetscInt i, nroots = sf->nroots, maxlocal; 1708 PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1709 maxlocal = sf->maxleaf-sf->minleaf+1; 1710 PetscCall(PetscMalloc1(nroots,&sf->degree)); 1711 PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1712 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1713 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1714 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM)); 1715 } 1716 *degree = NULL; 1717 PetscFunctionReturn(0); 1718 } 1719 1720 /*@C 1721 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1722 1723 Collective 1724 1725 Input Parameter: 1726 . sf - star forest 1727 1728 Output Parameter: 1729 . degree - degree of each root vertex 1730 1731 Level: developer 1732 1733 Notes: 1734 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1735 1736 .seealso: 1737 @*/ 1738 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1739 { 1740 PetscFunctionBegin; 1741 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1742 PetscSFCheckGraphSet(sf,1); 1743 PetscValidPointer(degree,2); 1744 if (!sf->degreeknown) { 1745 PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1746 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM)); 1747 PetscCall(PetscFree(sf->degreetmp)); 1748 sf->degreeknown = PETSC_TRUE; 1749 } 1750 *degree = sf->degree; 1751 PetscFunctionReturn(0); 1752 } 1753 1754 /*@C 1755 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1756 Each multi-root is assigned index of the corresponding original root. 1757 1758 Collective 1759 1760 Input Parameters: 1761 + sf - star forest 1762 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1763 1764 Output Parameters: 1765 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1766 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1767 1768 Level: developer 1769 1770 Notes: 1771 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1772 1773 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1774 @*/ 1775 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1776 { 1777 PetscSF msf; 1778 PetscInt i, j, k, nroots, nmroots; 1779 1780 PetscFunctionBegin; 1781 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1782 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1783 if (nroots) PetscValidIntPointer(degree,2); 1784 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1785 PetscValidPointer(multiRootsOrigNumbering,4); 1786 PetscCall(PetscSFGetMultiSF(sf,&msf)); 1787 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1788 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1789 for (i=0,j=0,k=0; i<nroots; i++) { 1790 if (!degree[i]) continue; 1791 for (j=0; j<degree[i]; j++,k++) { 1792 (*multiRootsOrigNumbering)[k] = i; 1793 } 1794 } 1795 PetscCheck(k == nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1796 if (nMultiRoots) *nMultiRoots = nmroots; 1797 PetscFunctionReturn(0); 1798 } 1799 1800 /*@C 1801 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1802 1803 Collective 1804 1805 Input Parameters: 1806 + sf - star forest 1807 . unit - data type 1808 - leafdata - leaf data to gather to roots 1809 1810 Output Parameter: 1811 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1812 1813 Level: intermediate 1814 1815 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1816 @*/ 1817 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1818 { 1819 PetscSF multi = NULL; 1820 1821 PetscFunctionBegin; 1822 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1823 PetscCall(PetscSFSetUp(sf)); 1824 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1825 PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE)); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 /*@C 1830 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1831 1832 Collective 1833 1834 Input Parameters: 1835 + sf - star forest 1836 . unit - data type 1837 - leafdata - leaf data to gather to roots 1838 1839 Output Parameter: 1840 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1841 1842 Level: intermediate 1843 1844 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1845 @*/ 1846 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1847 { 1848 PetscSF multi = NULL; 1849 1850 PetscFunctionBegin; 1851 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1852 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1853 PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE)); 1854 PetscFunctionReturn(0); 1855 } 1856 1857 /*@C 1858 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1859 1860 Collective 1861 1862 Input Parameters: 1863 + sf - star forest 1864 . unit - data type 1865 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1866 1867 Output Parameter: 1868 . leafdata - leaf data to be update with personal data from each respective root 1869 1870 Level: intermediate 1871 1872 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1873 @*/ 1874 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1875 { 1876 PetscSF multi = NULL; 1877 1878 PetscFunctionBegin; 1879 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1880 PetscCall(PetscSFSetUp(sf)); 1881 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1882 PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE)); 1883 PetscFunctionReturn(0); 1884 } 1885 1886 /*@C 1887 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1888 1889 Collective 1890 1891 Input Parameters: 1892 + sf - star forest 1893 . unit - data type 1894 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1895 1896 Output Parameter: 1897 . leafdata - leaf data to be update with personal data from each respective root 1898 1899 Level: intermediate 1900 1901 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1902 @*/ 1903 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1904 { 1905 PetscSF multi = NULL; 1906 1907 PetscFunctionBegin; 1908 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1909 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1910 PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE)); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1915 { 1916 PetscInt i, n, nleaves; 1917 const PetscInt *ilocal = NULL; 1918 PetscHSetI seen; 1919 1920 PetscFunctionBegin; 1921 if (PetscDefined(USE_DEBUG)) { 1922 PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL)); 1923 PetscCall(PetscHSetICreate(&seen)); 1924 for (i = 0; i < nleaves; i++) { 1925 const PetscInt leaf = ilocal ? ilocal[i] : i; 1926 PetscCall(PetscHSetIAdd(seen,leaf)); 1927 } 1928 PetscCall(PetscHSetIGetSize(seen,&n)); 1929 PetscCheck(n == nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1930 PetscCall(PetscHSetIDestroy(&seen)); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 /*@ 1936 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1937 1938 Input Parameters: 1939 + sfA - The first PetscSF 1940 - sfB - The second PetscSF 1941 1942 Output Parameters: 1943 . sfBA - The composite SF 1944 1945 Level: developer 1946 1947 Notes: 1948 Currently, the two SFs must be defined on congruent communicators and they must be true star 1949 forests, i.e. the same leaf is not connected with different roots. 1950 1951 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1952 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1953 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1954 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1955 1956 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1957 @*/ 1958 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1959 { 1960 const PetscSFNode *remotePointsA,*remotePointsB; 1961 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1962 const PetscInt *localPointsA,*localPointsB; 1963 PetscInt *localPointsBA; 1964 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1965 PetscBool denseB; 1966 1967 PetscFunctionBegin; 1968 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1969 PetscSFCheckGraphSet(sfA,1); 1970 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1971 PetscSFCheckGraphSet(sfB,2); 1972 PetscCheckSameComm(sfA,1,sfB,2); 1973 PetscValidPointer(sfBA,3); 1974 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1975 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1976 1977 PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA)); 1978 PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB)); 1979 if (localPointsA) { 1980 PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA)); 1981 for (i=0; i<numRootsB; i++) { 1982 reorderedRemotePointsA[i].rank = -1; 1983 reorderedRemotePointsA[i].index = -1; 1984 } 1985 for (i=0; i<numLeavesA; i++) { 1986 if (localPointsA[i] >= numRootsB) continue; 1987 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1988 } 1989 remotePointsA = reorderedRemotePointsA; 1990 } 1991 PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf)); 1992 PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB)); 1993 PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE)); 1994 PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE)); 1995 PetscCall(PetscFree(reorderedRemotePointsA)); 1996 1997 denseB = (PetscBool)!localPointsB; 1998 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1999 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2000 else numLeavesBA++; 2001 } 2002 if (denseB) { 2003 localPointsBA = NULL; 2004 remotePointsBA = leafdataB; 2005 } else { 2006 PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA)); 2007 PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA)); 2008 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2009 const PetscInt l = localPointsB ? localPointsB[i] : i; 2010 2011 if (leafdataB[l-minleaf].rank == -1) continue; 2012 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2013 localPointsBA[numLeavesBA] = l; 2014 numLeavesBA++; 2015 } 2016 PetscCall(PetscFree(leafdataB)); 2017 } 2018 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA)); 2019 PetscCall(PetscSFSetFromOptions(*sfBA)); 2020 PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER)); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 /*@ 2025 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2026 2027 Input Parameters: 2028 + sfA - The first PetscSF 2029 - sfB - The second PetscSF 2030 2031 Output Parameters: 2032 . sfBA - The composite SF. 2033 2034 Level: developer 2035 2036 Notes: 2037 Currently, the two SFs must be defined on congruent communicators and they must be true star 2038 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2039 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2040 2041 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2042 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2043 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2044 a Reduce on sfB, on connected roots. 2045 2046 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2047 @*/ 2048 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2049 { 2050 const PetscSFNode *remotePointsA,*remotePointsB; 2051 PetscSFNode *remotePointsBA; 2052 const PetscInt *localPointsA,*localPointsB; 2053 PetscSFNode *reorderedRemotePointsA = NULL; 2054 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2055 MPI_Op op; 2056 #if defined(PETSC_USE_64BIT_INDICES) 2057 PetscBool iswin; 2058 #endif 2059 2060 PetscFunctionBegin; 2061 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2062 PetscSFCheckGraphSet(sfA,1); 2063 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2064 PetscSFCheckGraphSet(sfB,2); 2065 PetscCheckSameComm(sfA,1,sfB,2); 2066 PetscValidPointer(sfBA,3); 2067 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2068 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2069 2070 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2071 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2072 2073 /* TODO: Check roots of sfB have degree of 1 */ 2074 /* Once we implement it, we can replace the MPI_MAXLOC 2075 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2076 We use MPI_MAXLOC only to have a deterministic output from this routine if 2077 the root condition is not meet. 2078 */ 2079 op = MPI_MAXLOC; 2080 #if defined(PETSC_USE_64BIT_INDICES) 2081 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2082 PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin)); 2083 if (iswin) op = MPI_REPLACE; 2084 #endif 2085 2086 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2087 PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA)); 2088 for (i=0; i<maxleaf - minleaf + 1; i++) { 2089 reorderedRemotePointsA[i].rank = -1; 2090 reorderedRemotePointsA[i].index = -1; 2091 } 2092 if (localPointsA) { 2093 for (i=0; i<numLeavesA; i++) { 2094 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2095 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2096 } 2097 } else { 2098 for (i=0; i<numLeavesA; i++) { 2099 if (i > maxleaf || i < minleaf) continue; 2100 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2101 } 2102 } 2103 2104 PetscCall(PetscMalloc1(numRootsB,&localPointsBA)); 2105 PetscCall(PetscMalloc1(numRootsB,&remotePointsBA)); 2106 for (i=0; i<numRootsB; i++) { 2107 remotePointsBA[i].rank = -1; 2108 remotePointsBA[i].index = -1; 2109 } 2110 2111 PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op)); 2112 PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op)); 2113 PetscCall(PetscFree(reorderedRemotePointsA)); 2114 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2115 if (remotePointsBA[i].rank == -1) continue; 2116 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2117 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2118 localPointsBA[numLeavesBA] = i; 2119 numLeavesBA++; 2120 } 2121 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA)); 2122 PetscCall(PetscSFSetFromOptions(*sfBA)); 2123 PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER)); 2124 PetscFunctionReturn(0); 2125 } 2126 2127 /* 2128 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2129 2130 Input Parameters: 2131 . sf - The global PetscSF 2132 2133 Output Parameters: 2134 . out - The local PetscSF 2135 */ 2136 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2137 { 2138 MPI_Comm comm; 2139 PetscMPIInt myrank; 2140 const PetscInt *ilocal; 2141 const PetscSFNode *iremote; 2142 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2143 PetscSFNode *liremote; 2144 PetscSF lsf; 2145 2146 PetscFunctionBegin; 2147 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2148 if (sf->ops->CreateLocalSF) { 2149 PetscCall((*sf->ops->CreateLocalSF)(sf,out)); 2150 } else { 2151 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2152 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 2153 PetscCallMPI(MPI_Comm_rank(comm,&myrank)); 2154 2155 /* Find out local edges and build a local SF */ 2156 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 2157 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2158 PetscCall(PetscMalloc1(lnleaves,&lilocal)); 2159 PetscCall(PetscMalloc1(lnleaves,&liremote)); 2160 2161 for (i=j=0; i<nleaves; i++) { 2162 if (iremote[i].rank == (PetscInt)myrank) { 2163 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2164 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2165 liremote[j].index = iremote[i].index; 2166 j++; 2167 } 2168 } 2169 PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf)); 2170 PetscCall(PetscSFSetFromOptions(lsf)); 2171 PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER)); 2172 PetscCall(PetscSFSetUp(lsf)); 2173 *out = lsf; 2174 } 2175 PetscFunctionReturn(0); 2176 } 2177 2178 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2179 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2180 { 2181 PetscMemType rootmtype,leafmtype; 2182 2183 PetscFunctionBegin; 2184 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2185 PetscCall(PetscSFSetUp(sf)); 2186 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 2187 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 2188 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 2189 if (sf->ops->BcastToZero) { 2190 PetscCall((*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata)); 2191 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2192 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 /*@ 2197 PetscSFConcatenate - concatenate multiple SFs into one 2198 2199 Input Parameters: 2200 + comm - the communicator 2201 . nsfs - the number of input PetscSF 2202 . sfs - the array of input PetscSF 2203 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE) 2204 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage 2205 2206 Output Parameters: 2207 . newsf - The resulting PetscSF 2208 2209 Level: developer 2210 2211 Notes: 2212 The communicator of all SFs in sfs must be comm. 2213 2214 The offsets in leafOffsets are added to the original leaf indices. 2215 2216 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2217 In this case, NULL is also passed as ilocal to the resulting SF. 2218 2219 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2220 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2221 2222 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2223 @*/ 2224 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) 2225 { 2226 PetscInt i, s, nLeaves, nRoots; 2227 PetscInt *leafArrayOffsets; 2228 PetscInt *ilocal_new; 2229 PetscSFNode *iremote_new; 2230 PetscInt *rootOffsets; 2231 PetscBool all_ilocal_null = PETSC_FALSE; 2232 PetscMPIInt rank; 2233 2234 PetscFunctionBegin; 2235 { 2236 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2237 2238 PetscCall(PetscSFCreate(comm,&dummy)); 2239 PetscValidLogicalCollectiveInt(dummy,nsfs,2); 2240 PetscValidPointer(sfs,3); 2241 for (i=0; i<nsfs; i++) { 2242 PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3); 2243 PetscCheckSameComm(dummy,1,sfs[i],3); 2244 } 2245 PetscValidLogicalCollectiveBool(dummy,shareRoots,4); 2246 if (leafOffsets) PetscValidIntPointer(leafOffsets,5); 2247 PetscValidPointer(newsf,6); 2248 PetscCall(PetscSFDestroy(&dummy)); 2249 } 2250 if (!nsfs) { 2251 PetscCall(PetscSFCreate(comm, newsf)); 2252 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2253 PetscFunctionReturn(0); 2254 } 2255 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2256 2257 PetscCall(PetscCalloc1(nsfs+1, &rootOffsets)); 2258 if (shareRoots) { 2259 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2260 if (PetscDefined(USE_DEBUG)) { 2261 for (s=1; s<nsfs; s++) { 2262 PetscInt nr; 2263 2264 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2265 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2266 } 2267 } 2268 } else { 2269 for (s=0; s<nsfs; s++) { 2270 PetscInt nr; 2271 2272 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2273 rootOffsets[s+1] = rootOffsets[s] + nr; 2274 } 2275 nRoots = rootOffsets[nsfs]; 2276 } 2277 2278 /* Calculate leaf array offsets and automatic root offsets */ 2279 PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets)); 2280 leafArrayOffsets[0] = 0; 2281 for (s=0; s<nsfs; s++) { 2282 PetscInt nl; 2283 2284 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2285 leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl; 2286 } 2287 nLeaves = leafArrayOffsets[nsfs]; 2288 2289 if (!leafOffsets) { 2290 all_ilocal_null = PETSC_TRUE; 2291 for (s=0; s<nsfs; s++) { 2292 const PetscInt *ilocal; 2293 2294 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2295 if (ilocal) { 2296 all_ilocal_null = PETSC_FALSE; 2297 break; 2298 } 2299 } 2300 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2301 } 2302 2303 /* Renumber and concatenate local leaves */ 2304 ilocal_new = NULL; 2305 if (!all_ilocal_null) { 2306 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2307 for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1; 2308 for (s = 0; s<nsfs; s++) { 2309 const PetscInt *ilocal; 2310 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2311 PetscInt i, nleaves_l; 2312 2313 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2314 for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2315 } 2316 } 2317 2318 /* Renumber and concatenate remote roots */ 2319 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2320 for (i = 0; i < nLeaves; i++) { 2321 iremote_new[i].rank = -1; 2322 iremote_new[i].index = -1; 2323 } 2324 for (s = 0; s<nsfs; s++) { 2325 PetscInt i, nl, nr; 2326 PetscSF tmp_sf; 2327 const PetscSFNode *iremote; 2328 PetscSFNode *tmp_rootdata; 2329 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2330 2331 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2332 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2333 /* create helper SF with contiguous leaves */ 2334 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES)); 2335 PetscCall(PetscSFSetUp(tmp_sf)); 2336 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2337 for (i = 0; i < nr; i++) { 2338 tmp_rootdata[i].index = i + rootOffsets[s]; 2339 tmp_rootdata[i].rank = (PetscInt) rank; 2340 } 2341 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2342 PetscCall(PetscSFBcastEnd( tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2343 PetscCall(PetscSFDestroy(&tmp_sf)); 2344 PetscCall(PetscFree(tmp_rootdata)); 2345 } 2346 2347 /* Build the new SF */ 2348 PetscCall(PetscSFCreate(comm, newsf)); 2349 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2350 PetscCall(PetscSFSetUp(*newsf)); 2351 PetscCall(PetscFree(rootOffsets)); 2352 PetscCall(PetscFree(leafArrayOffsets)); 2353 PetscFunctionReturn(0); 2354 } 2355