1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) PetscCheck((sf)->graphset,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); 19 #else 20 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 21 #endif 22 #endif 23 24 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 25 26 /*@ 27 PetscSFCreate - create a star forest communication context 28 29 Collective 30 31 Input Parameter: 32 . comm - communicator on which the star forest will operate 33 34 Output Parameter: 35 . sf - new star forest context 36 37 Options Database Keys: 38 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 39 . -sf_type window -Use MPI-3 one-sided window for communication 40 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 41 42 Level: intermediate 43 44 Notes: 45 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 46 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 47 SFs are optimized and they have better performance than general SFs. 48 49 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()` 50 @*/ 51 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 52 { 53 PetscSF b; 54 55 PetscFunctionBegin; 56 PetscValidPointer(sf,2); 57 PetscCall(PetscSFInitializePackage()); 58 59 PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView)); 60 61 b->nroots = -1; 62 b->nleaves = -1; 63 b->minleaf = PETSC_MAX_INT; 64 b->maxleaf = PETSC_MIN_INT; 65 b->nranks = -1; 66 b->rankorder = PETSC_TRUE; 67 b->ingroup = MPI_GROUP_NULL; 68 b->outgroup = MPI_GROUP_NULL; 69 b->graphset = PETSC_FALSE; 70 #if defined(PETSC_HAVE_DEVICE) 71 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 72 b->use_stream_aware_mpi = PETSC_FALSE; 73 b->unknown_input_stream= PETSC_FALSE; 74 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 75 b->backend = PETSCSF_BACKEND_KOKKOS; 76 #elif defined(PETSC_HAVE_CUDA) 77 b->backend = PETSCSF_BACKEND_CUDA; 78 #elif defined(PETSC_HAVE_HIP) 79 b->backend = PETSCSF_BACKEND_HIP; 80 #endif 81 82 #if defined(PETSC_HAVE_NVSHMEM) 83 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 84 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 85 PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL)); 86 PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL)); 87 #endif 88 #endif 89 b->vscat.from_n = -1; 90 b->vscat.to_n = -1; 91 b->vscat.unit = MPIU_SCALAR; 92 *sf = b; 93 PetscFunctionReturn(0); 94 } 95 96 /*@ 97 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 98 99 Collective 100 101 Input Parameter: 102 . sf - star forest 103 104 Level: advanced 105 106 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()` 107 @*/ 108 PetscErrorCode PetscSFReset(PetscSF sf) 109 { 110 PetscFunctionBegin; 111 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 112 PetscTryTypeMethod(sf,Reset); 113 sf->nroots = -1; 114 sf->nleaves = -1; 115 sf->minleaf = PETSC_MAX_INT; 116 sf->maxleaf = PETSC_MIN_INT; 117 sf->mine = NULL; 118 sf->remote = NULL; 119 sf->graphset = PETSC_FALSE; 120 PetscCall(PetscFree(sf->mine_alloc)); 121 PetscCall(PetscFree(sf->remote_alloc)); 122 sf->nranks = -1; 123 PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote)); 124 sf->degreeknown = PETSC_FALSE; 125 PetscCall(PetscFree(sf->degree)); 126 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 127 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 128 if (sf->multi) sf->multi->multi = NULL; 129 PetscCall(PetscSFDestroy(&sf->multi)); 130 PetscCall(PetscLayoutDestroy(&sf->map)); 131 132 #if defined(PETSC_HAVE_DEVICE) 133 for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i])); 134 #endif 135 136 sf->setupcalled = PETSC_FALSE; 137 PetscFunctionReturn(0); 138 } 139 140 /*@C 141 PetscSFSetType - Set the PetscSF communication implementation 142 143 Collective on PetscSF 144 145 Input Parameters: 146 + sf - the PetscSF context 147 - type - a known method 148 149 Options Database Key: 150 . -sf_type <type> - Sets the method; use -help for a list 151 of available methods (for instance, window, basic, neighbor) 152 153 Notes: 154 See "include/petscsf.h" for available methods (for instance) 155 + PETSCSFWINDOW - MPI-2/3 one-sided 156 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 157 158 Level: intermediate 159 160 .seealso: `PetscSFType`, `PetscSFCreate()` 161 @*/ 162 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 163 { 164 PetscBool match; 165 PetscErrorCode (*r)(PetscSF); 166 167 PetscFunctionBegin; 168 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 169 PetscValidCharPointer(type,2); 170 171 PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match)); 172 if (match) PetscFunctionReturn(0); 173 174 PetscCall(PetscFunctionListFind(PetscSFList,type,&r)); 175 PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 176 /* Destroy the previous PetscSF implementation context */ 177 PetscTryTypeMethod(sf,Destroy); 178 PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops))); 179 PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type)); 180 PetscCall((*r)(sf)); 181 PetscFunctionReturn(0); 182 } 183 184 /*@C 185 PetscSFGetType - Get the PetscSF communication implementation 186 187 Not Collective 188 189 Input Parameter: 190 . sf - the PetscSF context 191 192 Output Parameter: 193 . type - the PetscSF type name 194 195 Level: intermediate 196 197 .seealso: `PetscSFSetType()`, `PetscSFCreate()` 198 @*/ 199 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 200 { 201 PetscFunctionBegin; 202 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 203 PetscValidPointer(type,2); 204 *type = ((PetscObject)sf)->type_name; 205 PetscFunctionReturn(0); 206 } 207 208 /*@C 209 PetscSFDestroy - destroy star forest 210 211 Collective 212 213 Input Parameter: 214 . sf - address of star forest 215 216 Level: intermediate 217 218 .seealso: `PetscSFCreate()`, `PetscSFReset()` 219 @*/ 220 PetscErrorCode PetscSFDestroy(PetscSF *sf) 221 { 222 PetscFunctionBegin; 223 if (!*sf) PetscFunctionReturn(0); 224 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 225 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 226 PetscCall(PetscSFReset(*sf)); 227 PetscTryTypeMethod((*sf),Destroy); 228 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 229 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 230 PetscCall(PetscHeaderDestroy(sf)); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 235 { 236 PetscInt i, nleaves; 237 PetscMPIInt size; 238 const PetscInt *ilocal; 239 const PetscSFNode *iremote; 240 241 PetscFunctionBegin; 242 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 243 PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote)); 244 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size)); 245 for (i = 0; i < nleaves; i++) { 246 const PetscInt rank = iremote[i].rank; 247 const PetscInt remote = iremote[i].index; 248 const PetscInt leaf = ilocal ? ilocal[i] : i; 249 PetscCheck(rank >= 0 && rank < size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 250 PetscCheck(remote >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 251 PetscCheck(leaf >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 252 } 253 PetscFunctionReturn(0); 254 } 255 256 /*@ 257 PetscSFSetUp - set up communication structures 258 259 Collective 260 261 Input Parameter: 262 . sf - star forest communication object 263 264 Level: beginner 265 266 .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()` 267 @*/ 268 PetscErrorCode PetscSFSetUp(PetscSF sf) 269 { 270 PetscFunctionBegin; 271 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 272 PetscSFCheckGraphSet(sf,1); 273 if (sf->setupcalled) PetscFunctionReturn(0); 274 PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0)); 275 PetscCall(PetscSFCheckGraphValid_Private(sf)); 276 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */ 277 PetscTryTypeMethod(sf,SetUp); 278 #if defined(PETSC_HAVE_CUDA) 279 if (sf->backend == PETSCSF_BACKEND_CUDA) { 280 sf->ops->Malloc = PetscSFMalloc_CUDA; 281 sf->ops->Free = PetscSFFree_CUDA; 282 } 283 #endif 284 #if defined(PETSC_HAVE_HIP) 285 if (sf->backend == PETSCSF_BACKEND_HIP) { 286 sf->ops->Malloc = PetscSFMalloc_HIP; 287 sf->ops->Free = PetscSFFree_HIP; 288 } 289 #endif 290 291 # 292 #if defined(PETSC_HAVE_KOKKOS) 293 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 294 sf->ops->Malloc = PetscSFMalloc_Kokkos; 295 sf->ops->Free = PetscSFFree_Kokkos; 296 } 297 #endif 298 PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0)); 299 sf->setupcalled = PETSC_TRUE; 300 PetscFunctionReturn(0); 301 } 302 303 /*@ 304 PetscSFSetFromOptions - set PetscSF options using the options database 305 306 Logically Collective 307 308 Input Parameter: 309 . sf - star forest 310 311 Options Database Keys: 312 + -sf_type - implementation type, see PetscSFSetType() 313 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 314 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 315 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 316 If true, this option only works with -use_gpu_aware_mpi 1. 317 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 318 If true, this option only works with -use_gpu_aware_mpi 1. 319 320 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 321 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 322 the only available is kokkos. 323 324 Level: intermediate 325 @*/ 326 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 327 { 328 PetscSFType deft; 329 char type[256]; 330 PetscBool flg; 331 332 PetscFunctionBegin; 333 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 334 PetscObjectOptionsBegin((PetscObject)sf); 335 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 336 PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg)); 337 PetscCall(PetscSFSetType(sf,flg ? type : deft)); 338 PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL)); 339 #if defined(PETSC_HAVE_DEVICE) 340 { 341 char backendstr[32] = {0}; 342 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 343 /* Change the defaults set in PetscSFCreate() with command line options */ 344 PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL)); 345 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL)); 346 PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set)); 347 PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda)); 348 PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos)); 349 PetscCall(PetscStrcasecmp("hip",backendstr,&isHip)); 350 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 351 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 352 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 353 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 354 else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 355 #elif defined(PETSC_HAVE_KOKKOS) 356 PetscCheck(!set || isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 357 #endif 358 } 359 #endif 360 PetscTryTypeMethod(sf,SetFromOptions,PetscOptionsObject); 361 PetscOptionsEnd(); 362 PetscFunctionReturn(0); 363 } 364 365 /*@ 366 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 367 368 Logically Collective 369 370 Input Parameters: 371 + sf - star forest 372 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 373 374 Level: advanced 375 376 .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()` 377 @*/ 378 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 379 { 380 PetscFunctionBegin; 381 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 382 PetscValidLogicalCollectiveBool(sf,flg,2); 383 PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 384 sf->rankorder = flg; 385 PetscFunctionReturn(0); 386 } 387 388 /*@C 389 PetscSFSetGraph - Set a parallel star forest 390 391 Collective 392 393 Input Parameters: 394 + sf - star forest 395 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 396 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 397 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 398 during setup in debug mode) 399 . localmode - copy mode for ilocal 400 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 401 during setup in debug mode) 402 - remotemode - copy mode for iremote 403 404 Level: intermediate 405 406 Notes: 407 Leaf indices in ilocal must be unique, otherwise an error occurs. 408 409 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 410 In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 411 PETSc might modify the respective array; 412 if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 413 Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 414 415 Fortran Notes: 416 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 417 418 Developer Notes: 419 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 420 This also allows to compare leaf sets of two SFs easily. 421 422 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()` 423 @*/ 424 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode) 425 { 426 PetscBool unique, contiguous; 427 428 PetscFunctionBegin; 429 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 430 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 431 if (nleaves > 0) PetscValidPointer(iremote,6); 432 PetscCheck(nroots >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 433 PetscCheck(nleaves >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 434 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 435 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 436 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode); 437 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode); 438 439 if (sf->nroots >= 0) { /* Reset only if graph already set */ 440 PetscCall(PetscSFReset(sf)); 441 } 442 443 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0)); 444 445 sf->nroots = nroots; 446 sf->nleaves = nleaves; 447 448 if (localmode == PETSC_COPY_VALUES && ilocal) { 449 PetscInt *tlocal = NULL; 450 451 PetscCall(PetscMalloc1(nleaves,&tlocal)); 452 PetscCall(PetscArraycpy(tlocal,ilocal,nleaves)); 453 ilocal = tlocal; 454 } 455 if (remotemode == PETSC_COPY_VALUES) { 456 PetscSFNode *tremote = NULL; 457 458 PetscCall(PetscMalloc1(nleaves,&tremote)); 459 PetscCall(PetscArraycpy(tremote,iremote,nleaves)); 460 iremote = tremote; 461 } 462 463 if (nleaves && ilocal) { 464 PetscSFNode work; 465 466 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 467 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 468 unique = PetscNot(unique); 469 PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF"); 470 sf->minleaf = ilocal[0]; 471 sf->maxleaf = ilocal[nleaves-1]; 472 contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1); 473 } else { 474 sf->minleaf = 0; 475 sf->maxleaf = nleaves - 1; 476 unique = PETSC_TRUE; 477 contiguous = PETSC_TRUE; 478 } 479 480 if (contiguous) { 481 if (localmode == PETSC_USE_POINTER) { 482 ilocal = NULL; 483 } else { 484 PetscCall(PetscFree(ilocal)); 485 } 486 } 487 sf->mine = ilocal; 488 if (localmode == PETSC_USE_POINTER) { 489 sf->mine_alloc = NULL; 490 } else { 491 sf->mine_alloc = ilocal; 492 } 493 sf->remote = iremote; 494 if (remotemode == PETSC_USE_POINTER) { 495 sf->remote_alloc = NULL; 496 } else { 497 sf->remote_alloc = iremote; 498 } 499 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0)); 500 sf->graphset = PETSC_TRUE; 501 PetscFunctionReturn(0); 502 } 503 504 /*@ 505 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 506 507 Collective 508 509 Input Parameters: 510 + sf - The PetscSF 511 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 512 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 513 514 Notes: 515 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 516 n and N are local and global sizes of x respectively. 517 518 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 519 sequential vectors y on all ranks. 520 521 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 522 sequential vector y on rank 0. 523 524 In above cases, entries of x are roots and entries of y are leaves. 525 526 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 527 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 528 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 529 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 530 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 531 532 In this case, roots and leaves are symmetric. 533 534 Level: intermediate 535 @*/ 536 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 537 { 538 MPI_Comm comm; 539 PetscInt n,N,res[2]; 540 PetscMPIInt rank,size; 541 PetscSFType type; 542 543 PetscFunctionBegin; 544 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 545 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 546 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 547 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 548 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 549 PetscCallMPI(MPI_Comm_size(comm,&size)); 550 551 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 552 type = PETSCSFALLTOALL; 553 PetscCall(PetscLayoutCreate(comm,&sf->map)); 554 PetscCall(PetscLayoutSetLocalSize(sf->map,size)); 555 PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size)); 556 PetscCall(PetscLayoutSetUp(sf->map)); 557 } else { 558 PetscCall(PetscLayoutGetLocalSize(map,&n)); 559 PetscCall(PetscLayoutGetSize(map,&N)); 560 res[0] = n; 561 res[1] = -n; 562 /* Check if n are same over all ranks so that we can optimize it */ 563 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm)); 564 if (res[0] == -res[1]) { /* same n */ 565 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 566 } else { 567 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 568 } 569 PetscCall(PetscLayoutReference(map,&sf->map)); 570 } 571 PetscCall(PetscSFSetType(sf,type)); 572 573 sf->pattern = pattern; 574 sf->mine = NULL; /* Contiguous */ 575 576 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 577 Also set other easy stuff. 578 */ 579 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 580 sf->nleaves = N; 581 sf->nroots = n; 582 sf->nranks = size; 583 sf->minleaf = 0; 584 sf->maxleaf = N - 1; 585 } else if (pattern == PETSCSF_PATTERN_GATHER) { 586 sf->nleaves = rank ? 0 : N; 587 sf->nroots = n; 588 sf->nranks = rank ? 0 : size; 589 sf->minleaf = 0; 590 sf->maxleaf = rank ? -1 : N - 1; 591 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 592 sf->nleaves = size; 593 sf->nroots = size; 594 sf->nranks = size; 595 sf->minleaf = 0; 596 sf->maxleaf = size - 1; 597 } 598 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 599 sf->graphset = PETSC_TRUE; 600 PetscFunctionReturn(0); 601 } 602 603 /*@ 604 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 605 606 Collective 607 608 Input Parameter: 609 . sf - star forest to invert 610 611 Output Parameter: 612 . isf - inverse of sf 613 614 Level: advanced 615 616 Notes: 617 All roots must have degree 1. 618 619 The local space may be a permutation, but cannot be sparse. 620 621 .seealso: `PetscSFSetGraph()` 622 @*/ 623 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 624 { 625 PetscMPIInt rank; 626 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 627 const PetscInt *ilocal; 628 PetscSFNode *roots,*leaves; 629 630 PetscFunctionBegin; 631 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 632 PetscSFCheckGraphSet(sf,1); 633 PetscValidPointer(isf,2); 634 635 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL)); 636 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 637 638 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 639 PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves)); 640 for (i=0; i<maxlocal; i++) { 641 leaves[i].rank = rank; 642 leaves[i].index = i; 643 } 644 for (i=0; i <nroots; i++) { 645 roots[i].rank = -1; 646 roots[i].index = -1; 647 } 648 PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE)); 649 PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE)); 650 651 /* Check whether our leaves are sparse */ 652 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 653 if (count == nroots) newilocal = NULL; 654 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 655 PetscCall(PetscMalloc1(count,&newilocal)); 656 for (i=0,count=0; i<nroots; i++) { 657 if (roots[i].rank >= 0) { 658 newilocal[count] = i; 659 roots[count].rank = roots[i].rank; 660 roots[count].index = roots[i].index; 661 count++; 662 } 663 } 664 } 665 666 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf)); 667 PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES)); 668 PetscCall(PetscFree2(roots,leaves)); 669 PetscFunctionReturn(0); 670 } 671 672 /*@ 673 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 674 675 Collective 676 677 Input Parameters: 678 + sf - communication object to duplicate 679 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 680 681 Output Parameter: 682 . newsf - new communication object 683 684 Level: beginner 685 686 .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()` 687 @*/ 688 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 689 { 690 PetscSFType type; 691 MPI_Datatype dtype=MPIU_SCALAR; 692 693 PetscFunctionBegin; 694 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 695 PetscValidLogicalCollectiveEnum(sf,opt,2); 696 PetscValidPointer(newsf,3); 697 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf)); 698 PetscCall(PetscSFGetType(sf,&type)); 699 if (type) PetscCall(PetscSFSetType(*newsf,type)); 700 (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */ 701 if (opt == PETSCSF_DUPLICATE_GRAPH) { 702 PetscSFCheckGraphSet(sf,1); 703 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 704 PetscInt nroots,nleaves; 705 const PetscInt *ilocal; 706 const PetscSFNode *iremote; 707 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 708 PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES)); 709 } else { 710 PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern)); 711 } 712 } 713 /* Since oldtype is committed, so is newtype, according to MPI */ 714 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype)); 715 (*newsf)->vscat.bs = sf->vscat.bs; 716 (*newsf)->vscat.unit = dtype; 717 (*newsf)->vscat.to_n = sf->vscat.to_n; 718 (*newsf)->vscat.from_n = sf->vscat.from_n; 719 /* Do not copy lsf. Build it on demand since it is rarely used */ 720 721 #if defined(PETSC_HAVE_DEVICE) 722 (*newsf)->backend = sf->backend; 723 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 724 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 725 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 726 #endif 727 PetscTryTypeMethod(sf,Duplicate,opt,*newsf); 728 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 729 PetscFunctionReturn(0); 730 } 731 732 /*@C 733 PetscSFGetGraph - Get the graph specifying a parallel star forest 734 735 Not Collective 736 737 Input Parameter: 738 . sf - star forest 739 740 Output Parameters: 741 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 742 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 743 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 744 - iremote - remote locations of root vertices for each leaf on the current process 745 746 Notes: 747 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 748 749 The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 750 see its manpage for details. 751 752 Fortran Notes: 753 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 754 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 755 756 To check for a NULL ilocal use 757 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 758 759 Level: intermediate 760 761 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()` 762 @*/ 763 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 764 { 765 PetscFunctionBegin; 766 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 767 if (sf->ops->GetGraph) { 768 PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote)); 769 } else { 770 if (nroots) *nroots = sf->nroots; 771 if (nleaves) *nleaves = sf->nleaves; 772 if (ilocal) *ilocal = sf->mine; 773 if (iremote) *iremote = sf->remote; 774 } 775 PetscFunctionReturn(0); 776 } 777 778 /*@ 779 PetscSFGetLeafRange - Get the active leaf ranges 780 781 Not Collective 782 783 Input Parameter: 784 . sf - star forest 785 786 Output Parameters: 787 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 788 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 789 790 Level: developer 791 792 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 793 @*/ 794 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 795 { 796 PetscFunctionBegin; 797 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 798 PetscSFCheckGraphSet(sf,1); 799 if (minleaf) *minleaf = sf->minleaf; 800 if (maxleaf) *maxleaf = sf->maxleaf; 801 PetscFunctionReturn(0); 802 } 803 804 /*@C 805 PetscSFViewFromOptions - View from Options 806 807 Collective on PetscSF 808 809 Input Parameters: 810 + A - the star forest 811 . obj - Optional object 812 - name - command line option 813 814 Level: intermediate 815 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()` 816 @*/ 817 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 818 { 819 PetscFunctionBegin; 820 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 821 PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name)); 822 PetscFunctionReturn(0); 823 } 824 825 /*@C 826 PetscSFView - view a star forest 827 828 Collective 829 830 Input Parameters: 831 + sf - star forest 832 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 833 834 Level: beginner 835 836 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()` 837 @*/ 838 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 839 { 840 PetscBool iascii; 841 PetscViewerFormat format; 842 843 PetscFunctionBegin; 844 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 845 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer)); 846 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 847 PetscCheckSameComm(sf,1,viewer,2); 848 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 850 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 851 PetscMPIInt rank; 852 PetscInt ii,i,j; 853 854 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer)); 855 PetscCall(PetscViewerASCIIPushTab(viewer)); 856 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 857 if (!sf->graphset) { 858 PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n")); 859 PetscCall(PetscViewerASCIIPopTab(viewer)); 860 PetscFunctionReturn(0); 861 } 862 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 863 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 864 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks)); 865 for (i=0; i<sf->nleaves; i++) { 866 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index)); 867 } 868 PetscCall(PetscViewerFlush(viewer)); 869 PetscCall(PetscViewerGetFormat(viewer,&format)); 870 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 871 PetscMPIInt *tmpranks,*perm; 872 PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm)); 873 PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks)); 874 for (i=0; i<sf->nranks; i++) perm[i] = i; 875 PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm)); 876 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank)); 877 for (ii=0; ii<sf->nranks; ii++) { 878 i = perm[ii]; 879 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i])); 880 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 881 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j])); 882 } 883 } 884 PetscCall(PetscFree2(tmpranks,perm)); 885 } 886 PetscCall(PetscViewerFlush(viewer)); 887 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 888 } 889 PetscCall(PetscViewerASCIIPopTab(viewer)); 890 } 891 PetscTryTypeMethod(sf,View,viewer); 892 PetscFunctionReturn(0); 893 } 894 895 /*@C 896 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 897 898 Not Collective 899 900 Input Parameter: 901 . sf - star forest 902 903 Output Parameters: 904 + nranks - number of ranks referenced by local part 905 . ranks - array of ranks 906 . roffset - offset in rmine/rremote for each rank (length nranks+1) 907 . rmine - concatenated array holding local indices referencing each remote rank 908 - rremote - concatenated array holding remote indices referenced for each remote rank 909 910 Level: developer 911 912 .seealso: `PetscSFGetLeafRanks()` 913 @*/ 914 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 915 { 916 PetscFunctionBegin; 917 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 918 PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 919 if (sf->ops->GetRootRanks) { 920 PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote)); 921 } else { 922 /* The generic implementation */ 923 if (nranks) *nranks = sf->nranks; 924 if (ranks) *ranks = sf->ranks; 925 if (roffset) *roffset = sf->roffset; 926 if (rmine) *rmine = sf->rmine; 927 if (rremote) *rremote = sf->rremote; 928 } 929 PetscFunctionReturn(0); 930 } 931 932 /*@C 933 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 934 935 Not Collective 936 937 Input Parameter: 938 . sf - star forest 939 940 Output Parameters: 941 + niranks - number of leaf ranks referencing roots on this process 942 . iranks - array of ranks 943 . ioffset - offset in irootloc for each rank (length niranks+1) 944 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 945 946 Level: developer 947 948 .seealso: `PetscSFGetRootRanks()` 949 @*/ 950 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 951 { 952 PetscFunctionBegin; 953 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 954 PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 955 if (sf->ops->GetLeafRanks) { 956 PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc)); 957 } else { 958 PetscSFType type; 959 PetscCall(PetscSFGetType(sf,&type)); 960 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 961 } 962 PetscFunctionReturn(0); 963 } 964 965 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 966 PetscInt i; 967 for (i=0; i<n; i++) { 968 if (needle == list[i]) return PETSC_TRUE; 969 } 970 return PETSC_FALSE; 971 } 972 973 /*@C 974 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 975 976 Collective 977 978 Input Parameters: 979 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 980 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 981 982 Level: developer 983 984 .seealso: `PetscSFGetRootRanks()` 985 @*/ 986 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 987 { 988 PetscTable table; 989 PetscTablePosition pos; 990 PetscMPIInt size,groupsize,*groupranks; 991 PetscInt *rcount,*ranks; 992 PetscInt i, irank = -1,orank = -1; 993 994 PetscFunctionBegin; 995 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 996 PetscSFCheckGraphSet(sf,1); 997 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size)); 998 PetscCall(PetscTableCreate(10,size,&table)); 999 for (i=0; i<sf->nleaves; i++) { 1000 /* Log 1-based rank */ 1001 PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES)); 1002 } 1003 PetscCall(PetscTableGetCount(table,&sf->nranks)); 1004 PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote)); 1005 PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks)); 1006 PetscCall(PetscTableGetHeadPosition(table,&pos)); 1007 for (i=0; i<sf->nranks; i++) { 1008 PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i])); 1009 ranks[i]--; /* Convert back to 0-based */ 1010 } 1011 PetscCall(PetscTableDestroy(&table)); 1012 1013 /* We expect that dgroup is reliably "small" while nranks could be large */ 1014 { 1015 MPI_Group group = MPI_GROUP_NULL; 1016 PetscMPIInt *dgroupranks; 1017 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1018 PetscCallMPI(MPI_Group_size(dgroup,&groupsize)); 1019 PetscCall(PetscMalloc1(groupsize,&dgroupranks)); 1020 PetscCall(PetscMalloc1(groupsize,&groupranks)); 1021 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1022 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks)); 1023 PetscCallMPI(MPI_Group_free(&group)); 1024 PetscCall(PetscFree(dgroupranks)); 1025 } 1026 1027 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1028 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1029 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1030 if (InList(ranks[i],groupsize,groupranks)) break; 1031 } 1032 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1033 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1034 } 1035 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1036 PetscInt tmprank,tmpcount; 1037 1038 tmprank = ranks[i]; 1039 tmpcount = rcount[i]; 1040 ranks[i] = ranks[sf->ndranks]; 1041 rcount[i] = rcount[sf->ndranks]; 1042 ranks[sf->ndranks] = tmprank; 1043 rcount[sf->ndranks] = tmpcount; 1044 sf->ndranks++; 1045 } 1046 } 1047 PetscCall(PetscFree(groupranks)); 1048 PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount)); 1049 PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks)); 1050 sf->roffset[0] = 0; 1051 for (i=0; i<sf->nranks; i++) { 1052 PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i)); 1053 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1054 rcount[i] = 0; 1055 } 1056 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1057 /* short circuit */ 1058 if (orank != sf->remote[i].rank) { 1059 /* Search for index of iremote[i].rank in sf->ranks */ 1060 PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank)); 1061 if (irank < 0) { 1062 PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank)); 1063 if (irank >= 0) irank += sf->ndranks; 1064 } 1065 orank = sf->remote[i].rank; 1066 } 1067 PetscCheck(irank >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1068 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1069 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1070 rcount[irank]++; 1071 } 1072 PetscCall(PetscFree2(rcount,ranks)); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 /*@C 1077 PetscSFGetGroups - gets incoming and outgoing process groups 1078 1079 Collective 1080 1081 Input Parameter: 1082 . sf - star forest 1083 1084 Output Parameters: 1085 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1086 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1087 1088 Level: developer 1089 1090 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 1091 @*/ 1092 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1093 { 1094 MPI_Group group = MPI_GROUP_NULL; 1095 1096 PetscFunctionBegin; 1097 PetscCheck(sf->nranks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1098 if (sf->ingroup == MPI_GROUP_NULL) { 1099 PetscInt i; 1100 const PetscInt *indegree; 1101 PetscMPIInt rank,*outranks,*inranks; 1102 PetscSFNode *remote; 1103 PetscSF bgcount; 1104 1105 /* Compute the number of incoming ranks */ 1106 PetscCall(PetscMalloc1(sf->nranks,&remote)); 1107 for (i=0; i<sf->nranks; i++) { 1108 remote[i].rank = sf->ranks[i]; 1109 remote[i].index = 0; 1110 } 1111 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount)); 1112 PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER)); 1113 PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree)); 1114 PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree)); 1115 /* Enumerate the incoming ranks */ 1116 PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks)); 1117 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 1118 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1119 PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks)); 1120 PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks)); 1121 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1122 PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup)); 1123 PetscCallMPI(MPI_Group_free(&group)); 1124 PetscCall(PetscFree2(inranks,outranks)); 1125 PetscCall(PetscSFDestroy(&bgcount)); 1126 } 1127 *incoming = sf->ingroup; 1128 1129 if (sf->outgroup == MPI_GROUP_NULL) { 1130 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1131 PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup)); 1132 PetscCallMPI(MPI_Group_free(&group)); 1133 } 1134 *outgoing = sf->outgroup; 1135 PetscFunctionReturn(0); 1136 } 1137 1138 /*@ 1139 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1140 1141 Collective 1142 1143 Input Parameter: 1144 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1145 1146 Output Parameter: 1147 . multi - star forest with split roots, such that each root has degree exactly 1 1148 1149 Level: developer 1150 1151 Notes: 1152 1153 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1154 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1155 edge, it is a candidate for future optimization that might involve its removal. 1156 1157 .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()` 1158 @*/ 1159 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1160 { 1161 PetscFunctionBegin; 1162 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1163 PetscValidPointer(multi,2); 1164 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1165 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi)); 1166 *multi = sf->multi; 1167 sf->multi->multi = sf->multi; 1168 PetscFunctionReturn(0); 1169 } 1170 if (!sf->multi) { 1171 const PetscInt *indegree; 1172 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1173 PetscSFNode *remote; 1174 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1175 PetscCall(PetscSFComputeDegreeBegin(sf,&indegree)); 1176 PetscCall(PetscSFComputeDegreeEnd(sf,&indegree)); 1177 PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset)); 1178 inoffset[0] = 0; 1179 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1180 for (i=0; i<maxlocal; i++) outones[i] = 1; 1181 PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM)); 1182 PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM)); 1183 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1184 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1185 for (i=0; i<sf->nroots; i++) { 1186 PetscCheck(inoffset[i] + indegree[i] == inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1187 } 1188 } 1189 PetscCall(PetscMalloc1(sf->nleaves,&remote)); 1190 for (i=0; i<sf->nleaves; i++) { 1191 remote[i].rank = sf->remote[i].rank; 1192 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1193 } 1194 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi)); 1195 sf->multi->multi = sf->multi; 1196 PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER)); 1197 if (sf->rankorder) { /* Sort the ranks */ 1198 PetscMPIInt rank; 1199 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1200 PetscSFNode *newremote; 1201 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 1202 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1203 PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset)); 1204 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1205 PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE)); 1206 PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE)); 1207 /* Sort the incoming ranks at each vertex, build the inverse map */ 1208 for (i=0; i<sf->nroots; i++) { 1209 PetscInt j; 1210 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1211 PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset)); 1212 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1213 } 1214 PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE)); 1215 PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE)); 1216 PetscCall(PetscMalloc1(sf->nleaves,&newremote)); 1217 for (i=0; i<sf->nleaves; i++) { 1218 newremote[i].rank = sf->remote[i].rank; 1219 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1220 } 1221 PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER)); 1222 PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset)); 1223 } 1224 PetscCall(PetscFree3(inoffset,outones,outoffset)); 1225 } 1226 *multi = sf->multi; 1227 PetscFunctionReturn(0); 1228 } 1229 1230 /*@C 1231 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1232 1233 Collective 1234 1235 Input Parameters: 1236 + sf - original star forest 1237 . nselected - number of selected roots on this process 1238 - selected - indices of the selected roots on this process 1239 1240 Output Parameter: 1241 . esf - new star forest 1242 1243 Level: advanced 1244 1245 Note: 1246 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1247 be done by calling PetscSFGetGraph(). 1248 1249 .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()` 1250 @*/ 1251 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1252 { 1253 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1254 const PetscInt *ilocal; 1255 signed char *rootdata,*leafdata,*leafmem; 1256 const PetscSFNode *iremote; 1257 PetscSFNode *new_iremote; 1258 MPI_Comm comm; 1259 1260 PetscFunctionBegin; 1261 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1262 PetscSFCheckGraphSet(sf,1); 1263 if (nselected) PetscValidIntPointer(selected,3); 1264 PetscValidPointer(esf,4); 1265 1266 PetscCall(PetscSFSetUp(sf)); 1267 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0)); 1268 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 1269 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 1270 1271 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1272 PetscBool dups; 1273 PetscCall(PetscCheckDupsInt(nselected,selected,&dups)); 1274 PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1275 for (i=0; i<nselected; i++) 1276 PetscCheck(selected[i] >= 0 && selected[i] < nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1277 } 1278 1279 if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf,CreateEmbeddedRootSF ,nselected,selected,esf); 1280 else { 1281 /* A generic version of creating embedded sf */ 1282 PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf)); 1283 maxlocal = maxleaf - minleaf + 1; 1284 PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem)); 1285 leafdata = leafmem - minleaf; 1286 /* Tag selected roots and bcast to leaves */ 1287 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1288 PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE)); 1289 PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE)); 1290 1291 /* Build esf with leaves that are still connected */ 1292 esf_nleaves = 0; 1293 for (i=0; i<nleaves; i++) { 1294 j = ilocal ? ilocal[i] : i; 1295 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1296 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1297 */ 1298 esf_nleaves += (leafdata[j] ? 1 : 0); 1299 } 1300 PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal)); 1301 PetscCall(PetscMalloc1(esf_nleaves,&new_iremote)); 1302 for (i=n=0; i<nleaves; i++) { 1303 j = ilocal ? ilocal[i] : i; 1304 if (leafdata[j]) { 1305 new_ilocal[n] = j; 1306 new_iremote[n].rank = iremote[i].rank; 1307 new_iremote[n].index = iremote[i].index; 1308 ++n; 1309 } 1310 } 1311 PetscCall(PetscSFCreate(comm,esf)); 1312 PetscCall(PetscSFSetFromOptions(*esf)); 1313 PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 1314 PetscCall(PetscFree2(rootdata,leafmem)); 1315 } 1316 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0)); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 /*@C 1321 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1322 1323 Collective 1324 1325 Input Parameters: 1326 + sf - original star forest 1327 . nselected - number of selected leaves on this process 1328 - selected - indices of the selected leaves on this process 1329 1330 Output Parameter: 1331 . newsf - new star forest 1332 1333 Level: advanced 1334 1335 .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 1336 @*/ 1337 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1338 { 1339 const PetscSFNode *iremote; 1340 PetscSFNode *new_iremote; 1341 const PetscInt *ilocal; 1342 PetscInt i,nroots,*leaves,*new_ilocal; 1343 MPI_Comm comm; 1344 1345 PetscFunctionBegin; 1346 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1347 PetscSFCheckGraphSet(sf,1); 1348 if (nselected) PetscValidIntPointer(selected,3); 1349 PetscValidPointer(newsf,4); 1350 1351 /* Uniq selected[] and put results in leaves[] */ 1352 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 1353 PetscCall(PetscMalloc1(nselected,&leaves)); 1354 PetscCall(PetscArraycpy(leaves,selected,nselected)); 1355 PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves)); 1356 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1357 1358 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1359 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf,CreateEmbeddedLeafSF ,nselected,leaves,newsf); 1360 else { 1361 PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote)); 1362 PetscCall(PetscMalloc1(nselected,&new_ilocal)); 1363 PetscCall(PetscMalloc1(nselected,&new_iremote)); 1364 for (i=0; i<nselected; ++i) { 1365 const PetscInt l = leaves[i]; 1366 new_ilocal[i] = ilocal ? ilocal[l] : l; 1367 new_iremote[i].rank = iremote[l].rank; 1368 new_iremote[i].index = iremote[l].index; 1369 } 1370 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf)); 1371 PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 1372 } 1373 PetscCall(PetscFree(leaves)); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 /*@C 1378 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1379 1380 Collective on PetscSF 1381 1382 Input Parameters: 1383 + sf - star forest on which to communicate 1384 . unit - data type associated with each node 1385 . rootdata - buffer to broadcast 1386 - op - operation to use for reduction 1387 1388 Output Parameter: 1389 . leafdata - buffer to be reduced with values from each leaf's respective root 1390 1391 Level: intermediate 1392 1393 Notes: 1394 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1395 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1396 use PetscSFBcastWithMemTypeBegin() instead. 1397 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()` 1398 @*/ 1399 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1400 { 1401 PetscMemType rootmtype,leafmtype; 1402 1403 PetscFunctionBegin; 1404 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1405 PetscCall(PetscSFSetUp(sf)); 1406 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 1407 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1408 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1409 PetscUseTypeMethod(sf,BcastBegin ,unit,rootmtype,rootdata,leafmtype,leafdata,op); 1410 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 /*@C 1415 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1416 1417 Collective on PetscSF 1418 1419 Input Parameters: 1420 + sf - star forest on which to communicate 1421 . unit - data type associated with each node 1422 . rootmtype - memory type of rootdata 1423 . rootdata - buffer to broadcast 1424 . leafmtype - memory type of leafdata 1425 - op - operation to use for reduction 1426 1427 Output Parameter: 1428 . leafdata - buffer to be reduced with values from each leaf's respective root 1429 1430 Level: intermediate 1431 1432 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()` 1433 @*/ 1434 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1435 { 1436 PetscFunctionBegin; 1437 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1438 PetscCall(PetscSFSetUp(sf)); 1439 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 1440 PetscUseTypeMethod(sf,BcastBegin ,unit,rootmtype,rootdata,leafmtype,leafdata,op); 1441 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 /*@C 1446 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1447 1448 Collective 1449 1450 Input Parameters: 1451 + sf - star forest 1452 . unit - data type 1453 . rootdata - buffer to broadcast 1454 - op - operation to use for reduction 1455 1456 Output Parameter: 1457 . leafdata - buffer to be reduced with values from each leaf's respective root 1458 1459 Level: intermediate 1460 1461 .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()` 1462 @*/ 1463 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1464 { 1465 PetscFunctionBegin; 1466 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1467 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0)); 1468 PetscUseTypeMethod(sf,BcastEnd ,unit,rootdata,leafdata,op); 1469 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0)); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 /*@C 1474 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1475 1476 Collective 1477 1478 Input Parameters: 1479 + sf - star forest 1480 . unit - data type 1481 . leafdata - values to reduce 1482 - op - reduction operation 1483 1484 Output Parameter: 1485 . rootdata - result of reduction of values from all leaves of each root 1486 1487 Level: intermediate 1488 1489 Notes: 1490 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1491 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1492 use PetscSFReduceWithMemTypeBegin() instead. 1493 1494 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()` 1495 @*/ 1496 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1497 { 1498 PetscMemType rootmtype,leafmtype; 1499 1500 PetscFunctionBegin; 1501 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1502 PetscCall(PetscSFSetUp(sf)); 1503 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0)); 1504 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1505 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1506 PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op)); 1507 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0)); 1508 PetscFunctionReturn(0); 1509 } 1510 1511 /*@C 1512 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1513 1514 Collective 1515 1516 Input Parameters: 1517 + sf - star forest 1518 . unit - data type 1519 . leafmtype - memory type of leafdata 1520 . leafdata - values to reduce 1521 . rootmtype - memory type of rootdata 1522 - op - reduction operation 1523 1524 Output Parameter: 1525 . rootdata - result of reduction of values from all leaves of each root 1526 1527 Level: intermediate 1528 1529 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()` 1530 @*/ 1531 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1532 { 1533 PetscFunctionBegin; 1534 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1535 PetscCall(PetscSFSetUp(sf)); 1536 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0)); 1537 PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op)); 1538 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0)); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 /*@C 1543 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1544 1545 Collective 1546 1547 Input Parameters: 1548 + sf - star forest 1549 . unit - data type 1550 . leafdata - values to reduce 1551 - op - reduction operation 1552 1553 Output Parameter: 1554 . rootdata - result of reduction of values from all leaves of each root 1555 1556 Level: intermediate 1557 1558 .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()` 1559 @*/ 1560 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1561 { 1562 PetscFunctionBegin; 1563 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1564 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0)); 1565 PetscUseTypeMethod(sf,ReduceEnd ,unit,leafdata,rootdata,op); 1566 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0)); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 /*@C 1571 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1572 1573 Collective 1574 1575 Input Parameters: 1576 + sf - star forest 1577 . unit - data type 1578 . leafdata - leaf values to use in reduction 1579 - op - operation to use for reduction 1580 1581 Output Parameters: 1582 + rootdata - root values to be updated, input state is seen by first process to perform an update 1583 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1584 1585 Level: advanced 1586 1587 Note: 1588 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1589 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1590 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1591 integers. 1592 1593 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1594 @*/ 1595 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1596 { 1597 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1598 1599 PetscFunctionBegin; 1600 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1601 PetscCall(PetscSFSetUp(sf)); 1602 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1603 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1604 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1605 PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype)); 1606 PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1607 PetscUseTypeMethod(sf,FetchAndOpBegin ,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op); 1608 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1609 PetscFunctionReturn(0); 1610 } 1611 1612 /*@C 1613 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1614 1615 Collective 1616 1617 Input Parameters: 1618 + sf - star forest 1619 . unit - data type 1620 . rootmtype - memory type of rootdata 1621 . leafmtype - memory type of leafdata 1622 . leafdata - leaf values to use in reduction 1623 . leafupdatemtype - memory type of leafupdate 1624 - op - operation to use for reduction 1625 1626 Output Parameters: 1627 + rootdata - root values to be updated, input state is seen by first process to perform an update 1628 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1629 1630 Level: advanced 1631 1632 Note: See PetscSFFetchAndOpBegin() for more details. 1633 1634 .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()` 1635 @*/ 1636 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op) 1637 { 1638 PetscFunctionBegin; 1639 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1640 PetscCall(PetscSFSetUp(sf)); 1641 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1642 PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1643 PetscUseTypeMethod(sf,FetchAndOpBegin ,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op); 1644 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 /*@C 1649 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1650 1651 Collective 1652 1653 Input Parameters: 1654 + sf - star forest 1655 . unit - data type 1656 . leafdata - leaf values to use in reduction 1657 - op - operation to use for reduction 1658 1659 Output Parameters: 1660 + rootdata - root values to be updated, input state is seen by first process to perform an update 1661 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1662 1663 Level: advanced 1664 1665 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()` 1666 @*/ 1667 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1668 { 1669 PetscFunctionBegin; 1670 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1671 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0)); 1672 PetscUseTypeMethod(sf,FetchAndOpEnd ,unit,rootdata,leafdata,leafupdate,op); 1673 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0)); 1674 PetscFunctionReturn(0); 1675 } 1676 1677 /*@C 1678 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1679 1680 Collective 1681 1682 Input Parameter: 1683 . sf - star forest 1684 1685 Output Parameter: 1686 . degree - degree of each root vertex 1687 1688 Level: advanced 1689 1690 Notes: 1691 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1692 1693 .seealso: `PetscSFGatherBegin()` 1694 @*/ 1695 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1696 { 1697 PetscFunctionBegin; 1698 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1699 PetscSFCheckGraphSet(sf,1); 1700 PetscValidPointer(degree,2); 1701 if (!sf->degreeknown) { 1702 PetscInt i, nroots = sf->nroots, maxlocal; 1703 PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1704 maxlocal = sf->maxleaf-sf->minleaf+1; 1705 PetscCall(PetscMalloc1(nroots,&sf->degree)); 1706 PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1707 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1708 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1709 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM)); 1710 } 1711 *degree = NULL; 1712 PetscFunctionReturn(0); 1713 } 1714 1715 /*@C 1716 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1717 1718 Collective 1719 1720 Input Parameter: 1721 . sf - star forest 1722 1723 Output Parameter: 1724 . degree - degree of each root vertex 1725 1726 Level: developer 1727 1728 Notes: 1729 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1730 1731 .seealso: 1732 @*/ 1733 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1734 { 1735 PetscFunctionBegin; 1736 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1737 PetscSFCheckGraphSet(sf,1); 1738 PetscValidPointer(degree,2); 1739 if (!sf->degreeknown) { 1740 PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1741 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM)); 1742 PetscCall(PetscFree(sf->degreetmp)); 1743 sf->degreeknown = PETSC_TRUE; 1744 } 1745 *degree = sf->degree; 1746 PetscFunctionReturn(0); 1747 } 1748 1749 /*@C 1750 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1751 Each multi-root is assigned index of the corresponding original root. 1752 1753 Collective 1754 1755 Input Parameters: 1756 + sf - star forest 1757 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1758 1759 Output Parameters: 1760 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1761 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1762 1763 Level: developer 1764 1765 Notes: 1766 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1767 1768 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()` 1769 @*/ 1770 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1771 { 1772 PetscSF msf; 1773 PetscInt i, j, k, nroots, nmroots; 1774 1775 PetscFunctionBegin; 1776 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1777 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1778 if (nroots) PetscValidIntPointer(degree,2); 1779 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1780 PetscValidPointer(multiRootsOrigNumbering,4); 1781 PetscCall(PetscSFGetMultiSF(sf,&msf)); 1782 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1783 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1784 for (i=0,j=0,k=0; i<nroots; i++) { 1785 if (!degree[i]) continue; 1786 for (j=0; j<degree[i]; j++,k++) { 1787 (*multiRootsOrigNumbering)[k] = i; 1788 } 1789 } 1790 PetscCheck(k == nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1791 if (nMultiRoots) *nMultiRoots = nmroots; 1792 PetscFunctionReturn(0); 1793 } 1794 1795 /*@C 1796 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1797 1798 Collective 1799 1800 Input Parameters: 1801 + sf - star forest 1802 . unit - data type 1803 - leafdata - leaf data to gather to roots 1804 1805 Output Parameter: 1806 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1807 1808 Level: intermediate 1809 1810 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1811 @*/ 1812 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1813 { 1814 PetscSF multi = NULL; 1815 1816 PetscFunctionBegin; 1817 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1818 PetscCall(PetscSFSetUp(sf)); 1819 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1820 PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE)); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 /*@C 1825 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1826 1827 Collective 1828 1829 Input Parameters: 1830 + sf - star forest 1831 . unit - data type 1832 - leafdata - leaf data to gather to roots 1833 1834 Output Parameter: 1835 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1836 1837 Level: intermediate 1838 1839 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1840 @*/ 1841 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1842 { 1843 PetscSF multi = NULL; 1844 1845 PetscFunctionBegin; 1846 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1847 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1848 PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE)); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 /*@C 1853 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1854 1855 Collective 1856 1857 Input Parameters: 1858 + sf - star forest 1859 . unit - data type 1860 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1861 1862 Output Parameter: 1863 . leafdata - leaf data to be update with personal data from each respective root 1864 1865 Level: intermediate 1866 1867 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()` 1868 @*/ 1869 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1870 { 1871 PetscSF multi = NULL; 1872 1873 PetscFunctionBegin; 1874 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1875 PetscCall(PetscSFSetUp(sf)); 1876 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1877 PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE)); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 /*@C 1882 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1883 1884 Collective 1885 1886 Input Parameters: 1887 + sf - star forest 1888 . unit - data type 1889 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1890 1891 Output Parameter: 1892 . leafdata - leaf data to be update with personal data from each respective root 1893 1894 Level: intermediate 1895 1896 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()` 1897 @*/ 1898 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1899 { 1900 PetscSF multi = NULL; 1901 1902 PetscFunctionBegin; 1903 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1904 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1905 PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE)); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1910 { 1911 PetscInt i, n, nleaves; 1912 const PetscInt *ilocal = NULL; 1913 PetscHSetI seen; 1914 1915 PetscFunctionBegin; 1916 if (PetscDefined(USE_DEBUG)) { 1917 PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL)); 1918 PetscCall(PetscHSetICreate(&seen)); 1919 for (i = 0; i < nleaves; i++) { 1920 const PetscInt leaf = ilocal ? ilocal[i] : i; 1921 PetscCall(PetscHSetIAdd(seen,leaf)); 1922 } 1923 PetscCall(PetscHSetIGetSize(seen,&n)); 1924 PetscCheck(n == nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1925 PetscCall(PetscHSetIDestroy(&seen)); 1926 } 1927 PetscFunctionReturn(0); 1928 } 1929 1930 /*@ 1931 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1932 1933 Input Parameters: 1934 + sfA - The first PetscSF 1935 - sfB - The second PetscSF 1936 1937 Output Parameters: 1938 . sfBA - The composite SF 1939 1940 Level: developer 1941 1942 Notes: 1943 Currently, the two SFs must be defined on congruent communicators and they must be true star 1944 forests, i.e. the same leaf is not connected with different roots. 1945 1946 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1947 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1948 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1949 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1950 1951 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 1952 @*/ 1953 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1954 { 1955 const PetscSFNode *remotePointsA,*remotePointsB; 1956 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1957 const PetscInt *localPointsA,*localPointsB; 1958 PetscInt *localPointsBA; 1959 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1960 PetscBool denseB; 1961 1962 PetscFunctionBegin; 1963 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1964 PetscSFCheckGraphSet(sfA,1); 1965 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1966 PetscSFCheckGraphSet(sfB,2); 1967 PetscCheckSameComm(sfA,1,sfB,2); 1968 PetscValidPointer(sfBA,3); 1969 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1970 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1971 1972 PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA)); 1973 PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB)); 1974 if (localPointsA) { 1975 PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA)); 1976 for (i=0; i<numRootsB; i++) { 1977 reorderedRemotePointsA[i].rank = -1; 1978 reorderedRemotePointsA[i].index = -1; 1979 } 1980 for (i=0; i<numLeavesA; i++) { 1981 if (localPointsA[i] >= numRootsB) continue; 1982 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1983 } 1984 remotePointsA = reorderedRemotePointsA; 1985 } 1986 PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf)); 1987 PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB)); 1988 PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE)); 1989 PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE)); 1990 PetscCall(PetscFree(reorderedRemotePointsA)); 1991 1992 denseB = (PetscBool)!localPointsB; 1993 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1994 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1995 else numLeavesBA++; 1996 } 1997 if (denseB) { 1998 localPointsBA = NULL; 1999 remotePointsBA = leafdataB; 2000 } else { 2001 PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA)); 2002 PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA)); 2003 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2004 const PetscInt l = localPointsB ? localPointsB[i] : i; 2005 2006 if (leafdataB[l-minleaf].rank == -1) continue; 2007 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2008 localPointsBA[numLeavesBA] = l; 2009 numLeavesBA++; 2010 } 2011 PetscCall(PetscFree(leafdataB)); 2012 } 2013 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA)); 2014 PetscCall(PetscSFSetFromOptions(*sfBA)); 2015 PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER)); 2016 PetscFunctionReturn(0); 2017 } 2018 2019 /*@ 2020 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2021 2022 Input Parameters: 2023 + sfA - The first PetscSF 2024 - sfB - The second PetscSF 2025 2026 Output Parameters: 2027 . sfBA - The composite SF. 2028 2029 Level: developer 2030 2031 Notes: 2032 Currently, the two SFs must be defined on congruent communicators and they must be true star 2033 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2034 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2035 2036 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2037 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2038 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2039 a Reduce on sfB, on connected roots. 2040 2041 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()` 2042 @*/ 2043 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2044 { 2045 const PetscSFNode *remotePointsA,*remotePointsB; 2046 PetscSFNode *remotePointsBA; 2047 const PetscInt *localPointsA,*localPointsB; 2048 PetscSFNode *reorderedRemotePointsA = NULL; 2049 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2050 MPI_Op op; 2051 #if defined(PETSC_USE_64BIT_INDICES) 2052 PetscBool iswin; 2053 #endif 2054 2055 PetscFunctionBegin; 2056 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2057 PetscSFCheckGraphSet(sfA,1); 2058 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2059 PetscSFCheckGraphSet(sfB,2); 2060 PetscCheckSameComm(sfA,1,sfB,2); 2061 PetscValidPointer(sfBA,3); 2062 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2063 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2064 2065 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2066 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2067 2068 /* TODO: Check roots of sfB have degree of 1 */ 2069 /* Once we implement it, we can replace the MPI_MAXLOC 2070 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2071 We use MPI_MAXLOC only to have a deterministic output from this routine if 2072 the root condition is not meet. 2073 */ 2074 op = MPI_MAXLOC; 2075 #if defined(PETSC_USE_64BIT_INDICES) 2076 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2077 PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin)); 2078 if (iswin) op = MPI_REPLACE; 2079 #endif 2080 2081 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2082 PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA)); 2083 for (i=0; i<maxleaf - minleaf + 1; i++) { 2084 reorderedRemotePointsA[i].rank = -1; 2085 reorderedRemotePointsA[i].index = -1; 2086 } 2087 if (localPointsA) { 2088 for (i=0; i<numLeavesA; i++) { 2089 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2090 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2091 } 2092 } else { 2093 for (i=0; i<numLeavesA; i++) { 2094 if (i > maxleaf || i < minleaf) continue; 2095 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2096 } 2097 } 2098 2099 PetscCall(PetscMalloc1(numRootsB,&localPointsBA)); 2100 PetscCall(PetscMalloc1(numRootsB,&remotePointsBA)); 2101 for (i=0; i<numRootsB; i++) { 2102 remotePointsBA[i].rank = -1; 2103 remotePointsBA[i].index = -1; 2104 } 2105 2106 PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op)); 2107 PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op)); 2108 PetscCall(PetscFree(reorderedRemotePointsA)); 2109 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2110 if (remotePointsBA[i].rank == -1) continue; 2111 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2112 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2113 localPointsBA[numLeavesBA] = i; 2114 numLeavesBA++; 2115 } 2116 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA)); 2117 PetscCall(PetscSFSetFromOptions(*sfBA)); 2118 PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER)); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 /* 2123 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2124 2125 Input Parameters: 2126 . sf - The global PetscSF 2127 2128 Output Parameters: 2129 . out - The local PetscSF 2130 */ 2131 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2132 { 2133 MPI_Comm comm; 2134 PetscMPIInt myrank; 2135 const PetscInt *ilocal; 2136 const PetscSFNode *iremote; 2137 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2138 PetscSFNode *liremote; 2139 PetscSF lsf; 2140 2141 PetscFunctionBegin; 2142 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2143 if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf,CreateLocalSF ,out); 2144 else { 2145 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2146 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 2147 PetscCallMPI(MPI_Comm_rank(comm,&myrank)); 2148 2149 /* Find out local edges and build a local SF */ 2150 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 2151 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2152 PetscCall(PetscMalloc1(lnleaves,&lilocal)); 2153 PetscCall(PetscMalloc1(lnleaves,&liremote)); 2154 2155 for (i=j=0; i<nleaves; i++) { 2156 if (iremote[i].rank == (PetscInt)myrank) { 2157 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2158 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2159 liremote[j].index = iremote[i].index; 2160 j++; 2161 } 2162 } 2163 PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf)); 2164 PetscCall(PetscSFSetFromOptions(lsf)); 2165 PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER)); 2166 PetscCall(PetscSFSetUp(lsf)); 2167 *out = lsf; 2168 } 2169 PetscFunctionReturn(0); 2170 } 2171 2172 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2173 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2174 { 2175 PetscMemType rootmtype,leafmtype; 2176 2177 PetscFunctionBegin; 2178 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2179 PetscCall(PetscSFSetUp(sf)); 2180 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 2181 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 2182 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 2183 PetscUseTypeMethod(sf,BcastToZero ,unit,rootmtype,rootdata,leafmtype,leafdata); 2184 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 /*@ 2189 PetscSFConcatenate - concatenate multiple SFs into one 2190 2191 Input Parameters: 2192 + comm - the communicator 2193 . nsfs - the number of input PetscSF 2194 . sfs - the array of input PetscSF 2195 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE) 2196 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage 2197 2198 Output Parameters: 2199 . newsf - The resulting PetscSF 2200 2201 Level: developer 2202 2203 Notes: 2204 The communicator of all SFs in sfs must be comm. 2205 2206 The offsets in leafOffsets are added to the original leaf indices. 2207 2208 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2209 In this case, NULL is also passed as ilocal to the resulting SF. 2210 2211 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2212 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2213 2214 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()` 2215 @*/ 2216 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) 2217 { 2218 PetscInt i, s, nLeaves, nRoots; 2219 PetscInt *leafArrayOffsets; 2220 PetscInt *ilocal_new; 2221 PetscSFNode *iremote_new; 2222 PetscInt *rootOffsets; 2223 PetscBool all_ilocal_null = PETSC_FALSE; 2224 PetscMPIInt rank; 2225 2226 PetscFunctionBegin; 2227 { 2228 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2229 2230 PetscCall(PetscSFCreate(comm,&dummy)); 2231 PetscValidLogicalCollectiveInt(dummy,nsfs,2); 2232 PetscValidPointer(sfs,3); 2233 for (i=0; i<nsfs; i++) { 2234 PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3); 2235 PetscCheckSameComm(dummy,1,sfs[i],3); 2236 } 2237 PetscValidLogicalCollectiveBool(dummy,shareRoots,4); 2238 if (leafOffsets) PetscValidIntPointer(leafOffsets,5); 2239 PetscValidPointer(newsf,6); 2240 PetscCall(PetscSFDestroy(&dummy)); 2241 } 2242 if (!nsfs) { 2243 PetscCall(PetscSFCreate(comm, newsf)); 2244 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2245 PetscFunctionReturn(0); 2246 } 2247 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2248 2249 PetscCall(PetscCalloc1(nsfs+1, &rootOffsets)); 2250 if (shareRoots) { 2251 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2252 if (PetscDefined(USE_DEBUG)) { 2253 for (s=1; s<nsfs; s++) { 2254 PetscInt nr; 2255 2256 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2257 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2258 } 2259 } 2260 } else { 2261 for (s=0; s<nsfs; s++) { 2262 PetscInt nr; 2263 2264 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2265 rootOffsets[s+1] = rootOffsets[s] + nr; 2266 } 2267 nRoots = rootOffsets[nsfs]; 2268 } 2269 2270 /* Calculate leaf array offsets and automatic root offsets */ 2271 PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets)); 2272 leafArrayOffsets[0] = 0; 2273 for (s=0; s<nsfs; s++) { 2274 PetscInt nl; 2275 2276 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2277 leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl; 2278 } 2279 nLeaves = leafArrayOffsets[nsfs]; 2280 2281 if (!leafOffsets) { 2282 all_ilocal_null = PETSC_TRUE; 2283 for (s=0; s<nsfs; s++) { 2284 const PetscInt *ilocal; 2285 2286 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2287 if (ilocal) { 2288 all_ilocal_null = PETSC_FALSE; 2289 break; 2290 } 2291 } 2292 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2293 } 2294 2295 /* Renumber and concatenate local leaves */ 2296 ilocal_new = NULL; 2297 if (!all_ilocal_null) { 2298 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2299 for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1; 2300 for (s = 0; s<nsfs; s++) { 2301 const PetscInt *ilocal; 2302 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2303 PetscInt i, nleaves_l; 2304 2305 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2306 for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2307 } 2308 } 2309 2310 /* Renumber and concatenate remote roots */ 2311 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2312 for (i = 0; i < nLeaves; i++) { 2313 iremote_new[i].rank = -1; 2314 iremote_new[i].index = -1; 2315 } 2316 for (s = 0; s<nsfs; s++) { 2317 PetscInt i, nl, nr; 2318 PetscSF tmp_sf; 2319 const PetscSFNode *iremote; 2320 PetscSFNode *tmp_rootdata; 2321 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2322 2323 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2324 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2325 /* create helper SF with contiguous leaves */ 2326 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES)); 2327 PetscCall(PetscSFSetUp(tmp_sf)); 2328 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2329 for (i = 0; i < nr; i++) { 2330 tmp_rootdata[i].index = i + rootOffsets[s]; 2331 tmp_rootdata[i].rank = (PetscInt) rank; 2332 } 2333 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2334 PetscCall(PetscSFBcastEnd( tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2335 PetscCall(PetscSFDestroy(&tmp_sf)); 2336 PetscCall(PetscFree(tmp_rootdata)); 2337 } 2338 2339 /* Build the new SF */ 2340 PetscCall(PetscSFCreate(comm, newsf)); 2341 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2342 PetscCall(PetscSFSetUp(*newsf)); 2343 PetscCall(PetscFree(rootOffsets)); 2344 PetscCall(PetscFree(leafArrayOffsets)); 2345 PetscFunctionReturn(0); 2346 } 2347