1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) do { \ 19 if (PetscUnlikely(!(sf)->graphset)) \ 20 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 21 } while (0) 22 #else 23 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window -Use MPI-3 one-sided window for communication 43 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 44 45 Level: intermediate 46 47 Notes: 48 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 49 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 50 SFs are optimized and they have better performance than general SFs. 51 52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 53 @*/ 54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 55 { 56 PetscSF b; 57 58 PetscFunctionBegin; 59 PetscValidPointer(sf,2); 60 PetscCall(PetscSFInitializePackage()); 61 62 PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView)); 63 64 b->nroots = -1; 65 b->nleaves = -1; 66 b->minleaf = PETSC_MAX_INT; 67 b->maxleaf = PETSC_MIN_INT; 68 b->nranks = -1; 69 b->rankorder = PETSC_TRUE; 70 b->ingroup = MPI_GROUP_NULL; 71 b->outgroup = MPI_GROUP_NULL; 72 b->graphset = PETSC_FALSE; 73 #if defined(PETSC_HAVE_DEVICE) 74 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 75 b->use_stream_aware_mpi = PETSC_FALSE; 76 b->unknown_input_stream= PETSC_FALSE; 77 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 78 b->backend = PETSCSF_BACKEND_KOKKOS; 79 #elif defined(PETSC_HAVE_CUDA) 80 b->backend = PETSCSF_BACKEND_CUDA; 81 #elif defined(PETSC_HAVE_HIP) 82 b->backend = PETSCSF_BACKEND_HIP; 83 #endif 84 85 #if defined(PETSC_HAVE_NVSHMEM) 86 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 87 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 88 PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL)); 89 PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL)); 90 #endif 91 #endif 92 b->vscat.from_n = -1; 93 b->vscat.to_n = -1; 94 b->vscat.unit = MPIU_SCALAR; 95 *sf = b; 96 PetscFunctionReturn(0); 97 } 98 99 /*@ 100 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 101 102 Collective 103 104 Input Parameter: 105 . sf - star forest 106 107 Level: advanced 108 109 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 110 @*/ 111 PetscErrorCode PetscSFReset(PetscSF sf) 112 { 113 PetscFunctionBegin; 114 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 115 if (sf->ops->Reset) PetscCall((*sf->ops->Reset)(sf)); 116 sf->nroots = -1; 117 sf->nleaves = -1; 118 sf->minleaf = PETSC_MAX_INT; 119 sf->maxleaf = PETSC_MIN_INT; 120 sf->mine = NULL; 121 sf->remote = NULL; 122 sf->graphset = PETSC_FALSE; 123 PetscCall(PetscFree(sf->mine_alloc)); 124 PetscCall(PetscFree(sf->remote_alloc)); 125 sf->nranks = -1; 126 PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote)); 127 sf->degreeknown = PETSC_FALSE; 128 PetscCall(PetscFree(sf->degree)); 129 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 130 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 131 if (sf->multi) sf->multi->multi = NULL; 132 PetscCall(PetscSFDestroy(&sf->multi)); 133 PetscCall(PetscLayoutDestroy(&sf->map)); 134 135 #if defined(PETSC_HAVE_DEVICE) 136 for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i])); 137 #endif 138 139 sf->setupcalled = PETSC_FALSE; 140 PetscFunctionReturn(0); 141 } 142 143 /*@C 144 PetscSFSetType - Set the PetscSF communication implementation 145 146 Collective on PetscSF 147 148 Input Parameters: 149 + sf - the PetscSF context 150 - type - a known method 151 152 Options Database Key: 153 . -sf_type <type> - Sets the method; use -help for a list 154 of available methods (for instance, window, basic, neighbor) 155 156 Notes: 157 See "include/petscsf.h" for available methods (for instance) 158 + PETSCSFWINDOW - MPI-2/3 one-sided 159 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 160 161 Level: intermediate 162 163 .seealso: PetscSFType, PetscSFCreate() 164 @*/ 165 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 166 { 167 PetscBool match; 168 PetscErrorCode (*r)(PetscSF); 169 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 172 PetscValidCharPointer(type,2); 173 174 PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match)); 175 if (match) PetscFunctionReturn(0); 176 177 PetscCall(PetscFunctionListFind(PetscSFList,type,&r)); 178 PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 179 /* Destroy the previous PetscSF implementation context */ 180 if (sf->ops->Destroy) PetscCall((*(sf)->ops->Destroy)(sf)); 181 PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops))); 182 PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type)); 183 PetscCall((*r)(sf)); 184 PetscFunctionReturn(0); 185 } 186 187 /*@C 188 PetscSFGetType - Get the PetscSF communication implementation 189 190 Not Collective 191 192 Input Parameter: 193 . sf - the PetscSF context 194 195 Output Parameter: 196 . type - the PetscSF type name 197 198 Level: intermediate 199 200 .seealso: PetscSFSetType(), PetscSFCreate() 201 @*/ 202 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 203 { 204 PetscFunctionBegin; 205 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 206 PetscValidPointer(type,2); 207 *type = ((PetscObject)sf)->type_name; 208 PetscFunctionReturn(0); 209 } 210 211 /*@C 212 PetscSFDestroy - destroy star forest 213 214 Collective 215 216 Input Parameter: 217 . sf - address of star forest 218 219 Level: intermediate 220 221 .seealso: PetscSFCreate(), PetscSFReset() 222 @*/ 223 PetscErrorCode PetscSFDestroy(PetscSF *sf) 224 { 225 PetscFunctionBegin; 226 if (!*sf) PetscFunctionReturn(0); 227 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 228 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 229 PetscCall(PetscSFReset(*sf)); 230 if ((*sf)->ops->Destroy) PetscCall((*(*sf)->ops->Destroy)(*sf)); 231 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 232 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 233 PetscCall(PetscHeaderDestroy(sf)); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 238 { 239 PetscInt i, nleaves; 240 PetscMPIInt size; 241 const PetscInt *ilocal; 242 const PetscSFNode *iremote; 243 244 PetscFunctionBegin; 245 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 246 PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote)); 247 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size)); 248 for (i = 0; i < nleaves; i++) { 249 const PetscInt rank = iremote[i].rank; 250 const PetscInt remote = iremote[i].index; 251 const PetscInt leaf = ilocal ? ilocal[i] : i; 252 PetscCheck(rank >= 0 && rank < size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 253 PetscCheck(remote >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 254 PetscCheck(leaf >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 255 } 256 PetscFunctionReturn(0); 257 } 258 259 /*@ 260 PetscSFSetUp - set up communication structures 261 262 Collective 263 264 Input Parameter: 265 . sf - star forest communication object 266 267 Level: beginner 268 269 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 270 @*/ 271 PetscErrorCode PetscSFSetUp(PetscSF sf) 272 { 273 PetscFunctionBegin; 274 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 275 PetscSFCheckGraphSet(sf,1); 276 if (sf->setupcalled) PetscFunctionReturn(0); 277 PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0)); 278 PetscCall(PetscSFCheckGraphValid_Private(sf)); 279 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */ 280 if (sf->ops->SetUp) PetscCall((*sf->ops->SetUp)(sf)); 281 #if defined(PETSC_HAVE_CUDA) 282 if (sf->backend == PETSCSF_BACKEND_CUDA) { 283 sf->ops->Malloc = PetscSFMalloc_CUDA; 284 sf->ops->Free = PetscSFFree_CUDA; 285 } 286 #endif 287 #if defined(PETSC_HAVE_HIP) 288 if (sf->backend == PETSCSF_BACKEND_HIP) { 289 sf->ops->Malloc = PetscSFMalloc_HIP; 290 sf->ops->Free = PetscSFFree_HIP; 291 } 292 #endif 293 294 # 295 #if defined(PETSC_HAVE_KOKKOS) 296 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 297 sf->ops->Malloc = PetscSFMalloc_Kokkos; 298 sf->ops->Free = PetscSFFree_Kokkos; 299 } 300 #endif 301 PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0)); 302 sf->setupcalled = PETSC_TRUE; 303 PetscFunctionReturn(0); 304 } 305 306 /*@ 307 PetscSFSetFromOptions - set PetscSF options using the options database 308 309 Logically Collective 310 311 Input Parameter: 312 . sf - star forest 313 314 Options Database Keys: 315 + -sf_type - implementation type, see PetscSFSetType() 316 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 317 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 318 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 319 If true, this option only works with -use_gpu_aware_mpi 1. 320 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 321 If true, this option only works with -use_gpu_aware_mpi 1. 322 323 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 324 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 325 the only available is kokkos. 326 327 Level: intermediate 328 @*/ 329 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 330 { 331 PetscSFType deft; 332 char type[256]; 333 PetscBool flg; 334 335 PetscFunctionBegin; 336 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 337 PetscObjectOptionsBegin((PetscObject)sf); 338 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 339 PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg)); 340 PetscCall(PetscSFSetType(sf,flg ? type : deft)); 341 PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL)); 342 #if defined(PETSC_HAVE_DEVICE) 343 { 344 char backendstr[32] = {0}; 345 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 346 /* Change the defaults set in PetscSFCreate() with command line options */ 347 PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL)); 348 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL)); 349 PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set)); 350 PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda)); 351 PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos)); 352 PetscCall(PetscStrcasecmp("hip",backendstr,&isHip)); 353 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 354 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 355 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 356 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 357 else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 358 #elif defined(PETSC_HAVE_KOKKOS) 359 PetscCheck(!set || isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 360 #endif 361 } 362 #endif 363 if (sf->ops->SetFromOptions) PetscCall((*sf->ops->SetFromOptions)(PetscOptionsObject,sf)); 364 PetscOptionsEnd(); 365 PetscFunctionReturn(0); 366 } 367 368 /*@ 369 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 370 371 Logically Collective 372 373 Input Parameters: 374 + sf - star forest 375 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 376 377 Level: advanced 378 379 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 380 @*/ 381 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 382 { 383 PetscFunctionBegin; 384 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 385 PetscValidLogicalCollectiveBool(sf,flg,2); 386 PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 387 sf->rankorder = flg; 388 PetscFunctionReturn(0); 389 } 390 391 /*@C 392 PetscSFSetGraph - Set a parallel star forest 393 394 Collective 395 396 Input Parameters: 397 + sf - star forest 398 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 399 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 400 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 401 during setup in debug mode) 402 . localmode - copy mode for ilocal 403 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 404 during setup in debug mode) 405 - remotemode - copy mode for iremote 406 407 Level: intermediate 408 409 Notes: 410 Leaf indices in ilocal must be unique, otherwise an error occurs. 411 412 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 413 In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 414 PETSc might modify the respective array; 415 if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 416 Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 417 418 Fortran Notes: 419 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 420 421 Developer Notes: 422 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 423 This also allows to compare leaf sets of two SFs easily. 424 425 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 426 @*/ 427 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode) 428 { 429 PetscBool unique, contiguous; 430 431 PetscFunctionBegin; 432 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 433 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 434 if (nleaves > 0) PetscValidPointer(iremote,6); 435 PetscCheck(nroots >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 436 PetscCheck(nleaves >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 437 /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast 438 * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */ 439 PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode); 440 PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode); 441 442 if (sf->nroots >= 0) { /* Reset only if graph already set */ 443 PetscCall(PetscSFReset(sf)); 444 } 445 446 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0)); 447 448 sf->nroots = nroots; 449 sf->nleaves = nleaves; 450 451 if (localmode == PETSC_COPY_VALUES && ilocal) { 452 PetscInt *tlocal = NULL; 453 454 PetscCall(PetscMalloc1(nleaves,&tlocal)); 455 PetscCall(PetscArraycpy(tlocal,ilocal,nleaves)); 456 ilocal = tlocal; 457 } 458 if (remotemode == PETSC_COPY_VALUES) { 459 PetscSFNode *tremote = NULL; 460 461 PetscCall(PetscMalloc1(nleaves,&tremote)); 462 PetscCall(PetscArraycpy(tremote,iremote,nleaves)); 463 iremote = tremote; 464 } 465 466 if (nleaves && ilocal) { 467 PetscSFNode work; 468 469 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 470 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 471 unique = PetscNot(unique); 472 PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF"); 473 sf->minleaf = ilocal[0]; 474 sf->maxleaf = ilocal[nleaves-1]; 475 contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1); 476 } else { 477 sf->minleaf = 0; 478 sf->maxleaf = nleaves - 1; 479 unique = PETSC_TRUE; 480 contiguous = PETSC_TRUE; 481 } 482 483 if (contiguous) { 484 if (localmode == PETSC_USE_POINTER) { 485 ilocal = NULL; 486 } else { 487 PetscCall(PetscFree(ilocal)); 488 } 489 } 490 sf->mine = ilocal; 491 if (localmode == PETSC_USE_POINTER) { 492 sf->mine_alloc = NULL; 493 } else { 494 sf->mine_alloc = ilocal; 495 } 496 sf->remote = iremote; 497 if (remotemode == PETSC_USE_POINTER) { 498 sf->remote_alloc = NULL; 499 } else { 500 sf->remote_alloc = iremote; 501 } 502 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0)); 503 sf->graphset = PETSC_TRUE; 504 PetscFunctionReturn(0); 505 } 506 507 /*@ 508 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 509 510 Collective 511 512 Input Parameters: 513 + sf - The PetscSF 514 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 515 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 516 517 Notes: 518 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 519 n and N are local and global sizes of x respectively. 520 521 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 522 sequential vectors y on all ranks. 523 524 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 525 sequential vector y on rank 0. 526 527 In above cases, entries of x are roots and entries of y are leaves. 528 529 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 530 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 531 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 532 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 533 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 534 535 In this case, roots and leaves are symmetric. 536 537 Level: intermediate 538 @*/ 539 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 540 { 541 MPI_Comm comm; 542 PetscInt n,N,res[2]; 543 PetscMPIInt rank,size; 544 PetscSFType type; 545 546 PetscFunctionBegin; 547 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 548 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 549 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 550 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 551 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 552 PetscCallMPI(MPI_Comm_size(comm,&size)); 553 554 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 555 type = PETSCSFALLTOALL; 556 PetscCall(PetscLayoutCreate(comm,&sf->map)); 557 PetscCall(PetscLayoutSetLocalSize(sf->map,size)); 558 PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size)); 559 PetscCall(PetscLayoutSetUp(sf->map)); 560 } else { 561 PetscCall(PetscLayoutGetLocalSize(map,&n)); 562 PetscCall(PetscLayoutGetSize(map,&N)); 563 res[0] = n; 564 res[1] = -n; 565 /* Check if n are same over all ranks so that we can optimize it */ 566 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm)); 567 if (res[0] == -res[1]) { /* same n */ 568 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 569 } else { 570 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 571 } 572 PetscCall(PetscLayoutReference(map,&sf->map)); 573 } 574 PetscCall(PetscSFSetType(sf,type)); 575 576 sf->pattern = pattern; 577 sf->mine = NULL; /* Contiguous */ 578 579 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 580 Also set other easy stuff. 581 */ 582 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 583 sf->nleaves = N; 584 sf->nroots = n; 585 sf->nranks = size; 586 sf->minleaf = 0; 587 sf->maxleaf = N - 1; 588 } else if (pattern == PETSCSF_PATTERN_GATHER) { 589 sf->nleaves = rank ? 0 : N; 590 sf->nroots = n; 591 sf->nranks = rank ? 0 : size; 592 sf->minleaf = 0; 593 sf->maxleaf = rank ? -1 : N - 1; 594 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 595 sf->nleaves = size; 596 sf->nroots = size; 597 sf->nranks = size; 598 sf->minleaf = 0; 599 sf->maxleaf = size - 1; 600 } 601 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 602 sf->graphset = PETSC_TRUE; 603 PetscFunctionReturn(0); 604 } 605 606 /*@ 607 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 608 609 Collective 610 611 Input Parameter: 612 . sf - star forest to invert 613 614 Output Parameter: 615 . isf - inverse of sf 616 617 Level: advanced 618 619 Notes: 620 All roots must have degree 1. 621 622 The local space may be a permutation, but cannot be sparse. 623 624 .seealso: PetscSFSetGraph() 625 @*/ 626 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 627 { 628 PetscMPIInt rank; 629 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 630 const PetscInt *ilocal; 631 PetscSFNode *roots,*leaves; 632 633 PetscFunctionBegin; 634 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 635 PetscSFCheckGraphSet(sf,1); 636 PetscValidPointer(isf,2); 637 638 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL)); 639 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 640 641 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 642 PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves)); 643 for (i=0; i<maxlocal; i++) { 644 leaves[i].rank = rank; 645 leaves[i].index = i; 646 } 647 for (i=0; i <nroots; i++) { 648 roots[i].rank = -1; 649 roots[i].index = -1; 650 } 651 PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE)); 652 PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE)); 653 654 /* Check whether our leaves are sparse */ 655 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 656 if (count == nroots) newilocal = NULL; 657 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 658 PetscCall(PetscMalloc1(count,&newilocal)); 659 for (i=0,count=0; i<nroots; i++) { 660 if (roots[i].rank >= 0) { 661 newilocal[count] = i; 662 roots[count].rank = roots[i].rank; 663 roots[count].index = roots[i].index; 664 count++; 665 } 666 } 667 } 668 669 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf)); 670 PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES)); 671 PetscCall(PetscFree2(roots,leaves)); 672 PetscFunctionReturn(0); 673 } 674 675 /*@ 676 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 677 678 Collective 679 680 Input Parameters: 681 + sf - communication object to duplicate 682 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 683 684 Output Parameter: 685 . newsf - new communication object 686 687 Level: beginner 688 689 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 690 @*/ 691 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 692 { 693 PetscSFType type; 694 MPI_Datatype dtype=MPIU_SCALAR; 695 696 PetscFunctionBegin; 697 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 698 PetscValidLogicalCollectiveEnum(sf,opt,2); 699 PetscValidPointer(newsf,3); 700 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf)); 701 PetscCall(PetscSFGetType(sf,&type)); 702 if (type) PetscCall(PetscSFSetType(*newsf,type)); 703 if (opt == PETSCSF_DUPLICATE_GRAPH) { 704 PetscSFCheckGraphSet(sf,1); 705 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 706 PetscInt nroots,nleaves; 707 const PetscInt *ilocal; 708 const PetscSFNode *iremote; 709 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 710 PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES)); 711 } else { 712 PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern)); 713 } 714 } 715 /* Since oldtype is committed, so is newtype, according to MPI */ 716 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype)); 717 (*newsf)->vscat.bs = sf->vscat.bs; 718 (*newsf)->vscat.unit = dtype; 719 (*newsf)->vscat.to_n = sf->vscat.to_n; 720 (*newsf)->vscat.from_n = sf->vscat.from_n; 721 /* Do not copy lsf. Build it on demand since it is rarely used */ 722 723 #if defined(PETSC_HAVE_DEVICE) 724 (*newsf)->backend = sf->backend; 725 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 726 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 727 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 728 #endif 729 if (sf->ops->Duplicate) PetscCall((*sf->ops->Duplicate)(sf,opt,*newsf)); 730 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 731 PetscFunctionReturn(0); 732 } 733 734 /*@C 735 PetscSFGetGraph - Get the graph specifying a parallel star forest 736 737 Not Collective 738 739 Input Parameter: 740 . sf - star forest 741 742 Output Parameters: 743 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 744 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 745 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 746 - iremote - remote locations of root vertices for each leaf on the current process 747 748 Notes: 749 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 750 751 The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 752 see its manpage for details. 753 754 Fortran Notes: 755 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 756 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 757 758 To check for a NULL ilocal use 759 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 760 761 Level: intermediate 762 763 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 764 @*/ 765 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 766 { 767 PetscFunctionBegin; 768 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 769 if (sf->ops->GetGraph) { 770 PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote)); 771 } else { 772 if (nroots) *nroots = sf->nroots; 773 if (nleaves) *nleaves = sf->nleaves; 774 if (ilocal) *ilocal = sf->mine; 775 if (iremote) *iremote = sf->remote; 776 } 777 PetscFunctionReturn(0); 778 } 779 780 /*@ 781 PetscSFGetLeafRange - Get the active leaf ranges 782 783 Not Collective 784 785 Input Parameter: 786 . sf - star forest 787 788 Output Parameters: 789 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 790 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 791 792 Level: developer 793 794 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 795 @*/ 796 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 797 { 798 PetscFunctionBegin; 799 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 800 PetscSFCheckGraphSet(sf,1); 801 if (minleaf) *minleaf = sf->minleaf; 802 if (maxleaf) *maxleaf = sf->maxleaf; 803 PetscFunctionReturn(0); 804 } 805 806 /*@C 807 PetscSFViewFromOptions - View from Options 808 809 Collective on PetscSF 810 811 Input Parameters: 812 + A - the star forest 813 . obj - Optional object 814 - name - command line option 815 816 Level: intermediate 817 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 818 @*/ 819 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 820 { 821 PetscFunctionBegin; 822 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 823 PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name)); 824 PetscFunctionReturn(0); 825 } 826 827 /*@C 828 PetscSFView - view a star forest 829 830 Collective 831 832 Input Parameters: 833 + sf - star forest 834 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 835 836 Level: beginner 837 838 .seealso: PetscSFCreate(), PetscSFSetGraph() 839 @*/ 840 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 841 { 842 PetscBool iascii; 843 PetscViewerFormat format; 844 845 PetscFunctionBegin; 846 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 847 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer)); 848 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 849 PetscCheckSameComm(sf,1,viewer,2); 850 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 851 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 852 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 853 PetscMPIInt rank; 854 PetscInt ii,i,j; 855 856 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer)); 857 PetscCall(PetscViewerASCIIPushTab(viewer)); 858 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 859 if (!sf->graphset) { 860 PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n")); 861 PetscCall(PetscViewerASCIIPopTab(viewer)); 862 PetscFunctionReturn(0); 863 } 864 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 865 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 866 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks)); 867 for (i=0; i<sf->nleaves; i++) { 868 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index)); 869 } 870 PetscCall(PetscViewerFlush(viewer)); 871 PetscCall(PetscViewerGetFormat(viewer,&format)); 872 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 873 PetscMPIInt *tmpranks,*perm; 874 PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm)); 875 PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks)); 876 for (i=0; i<sf->nranks; i++) perm[i] = i; 877 PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm)); 878 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank)); 879 for (ii=0; ii<sf->nranks; ii++) { 880 i = perm[ii]; 881 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i])); 882 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 883 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j])); 884 } 885 } 886 PetscCall(PetscFree2(tmpranks,perm)); 887 } 888 PetscCall(PetscViewerFlush(viewer)); 889 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 890 } 891 PetscCall(PetscViewerASCIIPopTab(viewer)); 892 } 893 if (sf->ops->View) PetscCall((*sf->ops->View)(sf,viewer)); 894 PetscFunctionReturn(0); 895 } 896 897 /*@C 898 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 899 900 Not Collective 901 902 Input Parameter: 903 . sf - star forest 904 905 Output Parameters: 906 + nranks - number of ranks referenced by local part 907 . ranks - array of ranks 908 . roffset - offset in rmine/rremote for each rank (length nranks+1) 909 . rmine - concatenated array holding local indices referencing each remote rank 910 - rremote - concatenated array holding remote indices referenced for each remote rank 911 912 Level: developer 913 914 .seealso: PetscSFGetLeafRanks() 915 @*/ 916 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 917 { 918 PetscFunctionBegin; 919 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 920 PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 921 if (sf->ops->GetRootRanks) { 922 PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote)); 923 } else { 924 /* The generic implementation */ 925 if (nranks) *nranks = sf->nranks; 926 if (ranks) *ranks = sf->ranks; 927 if (roffset) *roffset = sf->roffset; 928 if (rmine) *rmine = sf->rmine; 929 if (rremote) *rremote = sf->rremote; 930 } 931 PetscFunctionReturn(0); 932 } 933 934 /*@C 935 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 936 937 Not Collective 938 939 Input Parameter: 940 . sf - star forest 941 942 Output Parameters: 943 + niranks - number of leaf ranks referencing roots on this process 944 . iranks - array of ranks 945 . ioffset - offset in irootloc for each rank (length niranks+1) 946 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 947 948 Level: developer 949 950 .seealso: PetscSFGetRootRanks() 951 @*/ 952 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 953 { 954 PetscFunctionBegin; 955 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 956 PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 957 if (sf->ops->GetLeafRanks) { 958 PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc)); 959 } else { 960 PetscSFType type; 961 PetscCall(PetscSFGetType(sf,&type)); 962 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 963 } 964 PetscFunctionReturn(0); 965 } 966 967 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 968 PetscInt i; 969 for (i=0; i<n; i++) { 970 if (needle == list[i]) return PETSC_TRUE; 971 } 972 return PETSC_FALSE; 973 } 974 975 /*@C 976 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 977 978 Collective 979 980 Input Parameters: 981 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 982 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 983 984 Level: developer 985 986 .seealso: PetscSFGetRootRanks() 987 @*/ 988 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 989 { 990 PetscTable table; 991 PetscTablePosition pos; 992 PetscMPIInt size,groupsize,*groupranks; 993 PetscInt *rcount,*ranks; 994 PetscInt i, irank = -1,orank = -1; 995 996 PetscFunctionBegin; 997 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 998 PetscSFCheckGraphSet(sf,1); 999 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size)); 1000 PetscCall(PetscTableCreate(10,size,&table)); 1001 for (i=0; i<sf->nleaves; i++) { 1002 /* Log 1-based rank */ 1003 PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES)); 1004 } 1005 PetscCall(PetscTableGetCount(table,&sf->nranks)); 1006 PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote)); 1007 PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks)); 1008 PetscCall(PetscTableGetHeadPosition(table,&pos)); 1009 for (i=0; i<sf->nranks; i++) { 1010 PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i])); 1011 ranks[i]--; /* Convert back to 0-based */ 1012 } 1013 PetscCall(PetscTableDestroy(&table)); 1014 1015 /* We expect that dgroup is reliably "small" while nranks could be large */ 1016 { 1017 MPI_Group group = MPI_GROUP_NULL; 1018 PetscMPIInt *dgroupranks; 1019 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1020 PetscCallMPI(MPI_Group_size(dgroup,&groupsize)); 1021 PetscCall(PetscMalloc1(groupsize,&dgroupranks)); 1022 PetscCall(PetscMalloc1(groupsize,&groupranks)); 1023 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1024 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks)); 1025 PetscCallMPI(MPI_Group_free(&group)); 1026 PetscCall(PetscFree(dgroupranks)); 1027 } 1028 1029 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1030 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1031 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1032 if (InList(ranks[i],groupsize,groupranks)) break; 1033 } 1034 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1035 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1036 } 1037 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1038 PetscInt tmprank,tmpcount; 1039 1040 tmprank = ranks[i]; 1041 tmpcount = rcount[i]; 1042 ranks[i] = ranks[sf->ndranks]; 1043 rcount[i] = rcount[sf->ndranks]; 1044 ranks[sf->ndranks] = tmprank; 1045 rcount[sf->ndranks] = tmpcount; 1046 sf->ndranks++; 1047 } 1048 } 1049 PetscCall(PetscFree(groupranks)); 1050 PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount)); 1051 PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks)); 1052 sf->roffset[0] = 0; 1053 for (i=0; i<sf->nranks; i++) { 1054 PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i)); 1055 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1056 rcount[i] = 0; 1057 } 1058 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1059 /* short circuit */ 1060 if (orank != sf->remote[i].rank) { 1061 /* Search for index of iremote[i].rank in sf->ranks */ 1062 PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank)); 1063 if (irank < 0) { 1064 PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank)); 1065 if (irank >= 0) irank += sf->ndranks; 1066 } 1067 orank = sf->remote[i].rank; 1068 } 1069 PetscCheck(irank >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1070 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1071 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1072 rcount[irank]++; 1073 } 1074 PetscCall(PetscFree2(rcount,ranks)); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 /*@C 1079 PetscSFGetGroups - gets incoming and outgoing process groups 1080 1081 Collective 1082 1083 Input Parameter: 1084 . sf - star forest 1085 1086 Output Parameters: 1087 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1088 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1089 1090 Level: developer 1091 1092 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1093 @*/ 1094 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1095 { 1096 MPI_Group group = MPI_GROUP_NULL; 1097 1098 PetscFunctionBegin; 1099 PetscCheck(sf->nranks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1100 if (sf->ingroup == MPI_GROUP_NULL) { 1101 PetscInt i; 1102 const PetscInt *indegree; 1103 PetscMPIInt rank,*outranks,*inranks; 1104 PetscSFNode *remote; 1105 PetscSF bgcount; 1106 1107 /* Compute the number of incoming ranks */ 1108 PetscCall(PetscMalloc1(sf->nranks,&remote)); 1109 for (i=0; i<sf->nranks; i++) { 1110 remote[i].rank = sf->ranks[i]; 1111 remote[i].index = 0; 1112 } 1113 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount)); 1114 PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER)); 1115 PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree)); 1116 PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree)); 1117 /* Enumerate the incoming ranks */ 1118 PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks)); 1119 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 1120 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1121 PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks)); 1122 PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks)); 1123 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1124 PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup)); 1125 PetscCallMPI(MPI_Group_free(&group)); 1126 PetscCall(PetscFree2(inranks,outranks)); 1127 PetscCall(PetscSFDestroy(&bgcount)); 1128 } 1129 *incoming = sf->ingroup; 1130 1131 if (sf->outgroup == MPI_GROUP_NULL) { 1132 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1133 PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup)); 1134 PetscCallMPI(MPI_Group_free(&group)); 1135 } 1136 *outgoing = sf->outgroup; 1137 PetscFunctionReturn(0); 1138 } 1139 1140 /*@ 1141 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1142 1143 Collective 1144 1145 Input Parameter: 1146 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1147 1148 Output Parameter: 1149 . multi - star forest with split roots, such that each root has degree exactly 1 1150 1151 Level: developer 1152 1153 Notes: 1154 1155 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1156 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1157 edge, it is a candidate for future optimization that might involve its removal. 1158 1159 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1160 @*/ 1161 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1162 { 1163 PetscFunctionBegin; 1164 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1165 PetscValidPointer(multi,2); 1166 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1167 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi)); 1168 *multi = sf->multi; 1169 sf->multi->multi = sf->multi; 1170 PetscFunctionReturn(0); 1171 } 1172 if (!sf->multi) { 1173 const PetscInt *indegree; 1174 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1175 PetscSFNode *remote; 1176 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1177 PetscCall(PetscSFComputeDegreeBegin(sf,&indegree)); 1178 PetscCall(PetscSFComputeDegreeEnd(sf,&indegree)); 1179 PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset)); 1180 inoffset[0] = 0; 1181 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1182 for (i=0; i<maxlocal; i++) outones[i] = 1; 1183 PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM)); 1184 PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM)); 1185 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1186 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1187 for (i=0; i<sf->nroots; i++) { 1188 PetscCheck(inoffset[i] + indegree[i] == inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1189 } 1190 } 1191 PetscCall(PetscMalloc1(sf->nleaves,&remote)); 1192 for (i=0; i<sf->nleaves; i++) { 1193 remote[i].rank = sf->remote[i].rank; 1194 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1195 } 1196 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi)); 1197 sf->multi->multi = sf->multi; 1198 PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER)); 1199 if (sf->rankorder) { /* Sort the ranks */ 1200 PetscMPIInt rank; 1201 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1202 PetscSFNode *newremote; 1203 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 1204 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1205 PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset)); 1206 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1207 PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE)); 1208 PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE)); 1209 /* Sort the incoming ranks at each vertex, build the inverse map */ 1210 for (i=0; i<sf->nroots; i++) { 1211 PetscInt j; 1212 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1213 PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset)); 1214 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1215 } 1216 PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE)); 1217 PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE)); 1218 PetscCall(PetscMalloc1(sf->nleaves,&newremote)); 1219 for (i=0; i<sf->nleaves; i++) { 1220 newremote[i].rank = sf->remote[i].rank; 1221 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1222 } 1223 PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER)); 1224 PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset)); 1225 } 1226 PetscCall(PetscFree3(inoffset,outones,outoffset)); 1227 } 1228 *multi = sf->multi; 1229 PetscFunctionReturn(0); 1230 } 1231 1232 /*@C 1233 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1234 1235 Collective 1236 1237 Input Parameters: 1238 + sf - original star forest 1239 . nselected - number of selected roots on this process 1240 - selected - indices of the selected roots on this process 1241 1242 Output Parameter: 1243 . esf - new star forest 1244 1245 Level: advanced 1246 1247 Note: 1248 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1249 be done by calling PetscSFGetGraph(). 1250 1251 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1252 @*/ 1253 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1254 { 1255 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1256 const PetscInt *ilocal; 1257 signed char *rootdata,*leafdata,*leafmem; 1258 const PetscSFNode *iremote; 1259 PetscSFNode *new_iremote; 1260 MPI_Comm comm; 1261 1262 PetscFunctionBegin; 1263 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1264 PetscSFCheckGraphSet(sf,1); 1265 if (nselected) PetscValidIntPointer(selected,3); 1266 PetscValidPointer(esf,4); 1267 1268 PetscCall(PetscSFSetUp(sf)); 1269 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0)); 1270 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 1271 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 1272 1273 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1274 PetscBool dups; 1275 PetscCall(PetscCheckDupsInt(nselected,selected,&dups)); 1276 PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1277 for (i=0; i<nselected; i++) 1278 PetscCheck(selected[i] >= 0 && selected[i] < nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1279 } 1280 1281 if (sf->ops->CreateEmbeddedRootSF) { 1282 PetscCall((*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf)); 1283 } else { 1284 /* A generic version of creating embedded sf */ 1285 PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf)); 1286 maxlocal = maxleaf - minleaf + 1; 1287 PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem)); 1288 leafdata = leafmem - minleaf; 1289 /* Tag selected roots and bcast to leaves */ 1290 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1291 PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE)); 1292 PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE)); 1293 1294 /* Build esf with leaves that are still connected */ 1295 esf_nleaves = 0; 1296 for (i=0; i<nleaves; i++) { 1297 j = ilocal ? ilocal[i] : i; 1298 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1299 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1300 */ 1301 esf_nleaves += (leafdata[j] ? 1 : 0); 1302 } 1303 PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal)); 1304 PetscCall(PetscMalloc1(esf_nleaves,&new_iremote)); 1305 for (i=n=0; i<nleaves; i++) { 1306 j = ilocal ? ilocal[i] : i; 1307 if (leafdata[j]) { 1308 new_ilocal[n] = j; 1309 new_iremote[n].rank = iremote[i].rank; 1310 new_iremote[n].index = iremote[i].index; 1311 ++n; 1312 } 1313 } 1314 PetscCall(PetscSFCreate(comm,esf)); 1315 PetscCall(PetscSFSetFromOptions(*esf)); 1316 PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 1317 PetscCall(PetscFree2(rootdata,leafmem)); 1318 } 1319 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0)); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 /*@C 1324 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1325 1326 Collective 1327 1328 Input Parameters: 1329 + sf - original star forest 1330 . nselected - number of selected leaves on this process 1331 - selected - indices of the selected leaves on this process 1332 1333 Output Parameter: 1334 . newsf - new star forest 1335 1336 Level: advanced 1337 1338 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1339 @*/ 1340 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1341 { 1342 const PetscSFNode *iremote; 1343 PetscSFNode *new_iremote; 1344 const PetscInt *ilocal; 1345 PetscInt i,nroots,*leaves,*new_ilocal; 1346 MPI_Comm comm; 1347 1348 PetscFunctionBegin; 1349 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1350 PetscSFCheckGraphSet(sf,1); 1351 if (nselected) PetscValidIntPointer(selected,3); 1352 PetscValidPointer(newsf,4); 1353 1354 /* Uniq selected[] and put results in leaves[] */ 1355 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 1356 PetscCall(PetscMalloc1(nselected,&leaves)); 1357 PetscCall(PetscArraycpy(leaves,selected,nselected)); 1358 PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves)); 1359 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1360 1361 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1362 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1363 PetscCall((*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf)); 1364 } else { 1365 PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote)); 1366 PetscCall(PetscMalloc1(nselected,&new_ilocal)); 1367 PetscCall(PetscMalloc1(nselected,&new_iremote)); 1368 for (i=0; i<nselected; ++i) { 1369 const PetscInt l = leaves[i]; 1370 new_ilocal[i] = ilocal ? ilocal[l] : l; 1371 new_iremote[i].rank = iremote[l].rank; 1372 new_iremote[i].index = iremote[l].index; 1373 } 1374 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf)); 1375 PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 1376 } 1377 PetscCall(PetscFree(leaves)); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 /*@C 1382 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1383 1384 Collective on PetscSF 1385 1386 Input Parameters: 1387 + sf - star forest on which to communicate 1388 . unit - data type associated with each node 1389 . rootdata - buffer to broadcast 1390 - op - operation to use for reduction 1391 1392 Output Parameter: 1393 . leafdata - buffer to be reduced with values from each leaf's respective root 1394 1395 Level: intermediate 1396 1397 Notes: 1398 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1399 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1400 use PetscSFBcastWithMemTypeBegin() instead. 1401 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1402 @*/ 1403 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1404 { 1405 PetscMemType rootmtype,leafmtype; 1406 1407 PetscFunctionBegin; 1408 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1409 PetscCall(PetscSFSetUp(sf)); 1410 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 1411 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1412 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1413 PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op)); 1414 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 /*@C 1419 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1420 1421 Collective on PetscSF 1422 1423 Input Parameters: 1424 + sf - star forest on which to communicate 1425 . unit - data type associated with each node 1426 . rootmtype - memory type of rootdata 1427 . rootdata - buffer to broadcast 1428 . leafmtype - memory type of leafdata 1429 - op - operation to use for reduction 1430 1431 Output Parameter: 1432 . leafdata - buffer to be reduced with values from each leaf's respective root 1433 1434 Level: intermediate 1435 1436 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1437 @*/ 1438 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1439 { 1440 PetscFunctionBegin; 1441 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1442 PetscCall(PetscSFSetUp(sf)); 1443 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 1444 PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op)); 1445 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 /*@C 1450 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1451 1452 Collective 1453 1454 Input Parameters: 1455 + sf - star forest 1456 . unit - data type 1457 . rootdata - buffer to broadcast 1458 - op - operation to use for reduction 1459 1460 Output Parameter: 1461 . leafdata - buffer to be reduced with values from each leaf's respective root 1462 1463 Level: intermediate 1464 1465 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1466 @*/ 1467 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1468 { 1469 PetscFunctionBegin; 1470 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1471 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0)); 1472 PetscCall((*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op)); 1473 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0)); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 /*@C 1478 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1479 1480 Collective 1481 1482 Input Parameters: 1483 + sf - star forest 1484 . unit - data type 1485 . leafdata - values to reduce 1486 - op - reduction operation 1487 1488 Output Parameter: 1489 . rootdata - result of reduction of values from all leaves of each root 1490 1491 Level: intermediate 1492 1493 Notes: 1494 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1495 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1496 use PetscSFReduceWithMemTypeBegin() instead. 1497 1498 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1499 @*/ 1500 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1501 { 1502 PetscMemType rootmtype,leafmtype; 1503 1504 PetscFunctionBegin; 1505 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1506 PetscCall(PetscSFSetUp(sf)); 1507 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0)); 1508 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1509 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1510 PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op)); 1511 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0)); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 /*@C 1516 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1517 1518 Collective 1519 1520 Input Parameters: 1521 + sf - star forest 1522 . unit - data type 1523 . leafmtype - memory type of leafdata 1524 . leafdata - values to reduce 1525 . rootmtype - memory type of rootdata 1526 - op - reduction operation 1527 1528 Output Parameter: 1529 . rootdata - result of reduction of values from all leaves of each root 1530 1531 Level: intermediate 1532 1533 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1534 @*/ 1535 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1536 { 1537 PetscFunctionBegin; 1538 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1539 PetscCall(PetscSFSetUp(sf)); 1540 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0)); 1541 PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op)); 1542 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0)); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 /*@C 1547 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1548 1549 Collective 1550 1551 Input Parameters: 1552 + sf - star forest 1553 . unit - data type 1554 . leafdata - values to reduce 1555 - op - reduction operation 1556 1557 Output Parameter: 1558 . rootdata - result of reduction of values from all leaves of each root 1559 1560 Level: intermediate 1561 1562 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1563 @*/ 1564 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1565 { 1566 PetscFunctionBegin; 1567 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1568 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0)); 1569 PetscCall((*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op)); 1570 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0)); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 /*@C 1575 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1576 1577 Collective 1578 1579 Input Parameters: 1580 + sf - star forest 1581 . unit - data type 1582 . leafdata - leaf values to use in reduction 1583 - op - operation to use for reduction 1584 1585 Output Parameters: 1586 + rootdata - root values to be updated, input state is seen by first process to perform an update 1587 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1588 1589 Level: advanced 1590 1591 Note: 1592 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1593 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1594 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1595 integers. 1596 1597 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1598 @*/ 1599 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1600 { 1601 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1602 1603 PetscFunctionBegin; 1604 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1605 PetscCall(PetscSFSetUp(sf)); 1606 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1607 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1608 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1609 PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype)); 1610 PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1611 PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op)); 1612 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 /*@C 1617 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1618 1619 Collective 1620 1621 Input Parameters: 1622 + sf - star forest 1623 . unit - data type 1624 . rootmtype - memory type of rootdata 1625 . leafmtype - memory type of leafdata 1626 . leafdata - leaf values to use in reduction 1627 . leafupdatemtype - memory type of leafupdate 1628 - op - operation to use for reduction 1629 1630 Output Parameters: 1631 + rootdata - root values to be updated, input state is seen by first process to perform an update 1632 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1633 1634 Level: advanced 1635 1636 Note: See PetscSFFetchAndOpBegin() for more details. 1637 1638 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1639 @*/ 1640 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op) 1641 { 1642 PetscFunctionBegin; 1643 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1644 PetscCall(PetscSFSetUp(sf)); 1645 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1646 PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1647 PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op)); 1648 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 /*@C 1653 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1654 1655 Collective 1656 1657 Input Parameters: 1658 + sf - star forest 1659 . unit - data type 1660 . leafdata - leaf values to use in reduction 1661 - op - operation to use for reduction 1662 1663 Output Parameters: 1664 + rootdata - root values to be updated, input state is seen by first process to perform an update 1665 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1666 1667 Level: advanced 1668 1669 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1670 @*/ 1671 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1672 { 1673 PetscFunctionBegin; 1674 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1675 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0)); 1676 PetscCall((*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op)); 1677 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0)); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 /*@C 1682 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1683 1684 Collective 1685 1686 Input Parameter: 1687 . sf - star forest 1688 1689 Output Parameter: 1690 . degree - degree of each root vertex 1691 1692 Level: advanced 1693 1694 Notes: 1695 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1696 1697 .seealso: PetscSFGatherBegin() 1698 @*/ 1699 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1700 { 1701 PetscFunctionBegin; 1702 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1703 PetscSFCheckGraphSet(sf,1); 1704 PetscValidPointer(degree,2); 1705 if (!sf->degreeknown) { 1706 PetscInt i, nroots = sf->nroots, maxlocal; 1707 PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1708 maxlocal = sf->maxleaf-sf->minleaf+1; 1709 PetscCall(PetscMalloc1(nroots,&sf->degree)); 1710 PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1711 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1712 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1713 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM)); 1714 } 1715 *degree = NULL; 1716 PetscFunctionReturn(0); 1717 } 1718 1719 /*@C 1720 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1721 1722 Collective 1723 1724 Input Parameter: 1725 . sf - star forest 1726 1727 Output Parameter: 1728 . degree - degree of each root vertex 1729 1730 Level: developer 1731 1732 Notes: 1733 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1734 1735 .seealso: 1736 @*/ 1737 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1738 { 1739 PetscFunctionBegin; 1740 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1741 PetscSFCheckGraphSet(sf,1); 1742 PetscValidPointer(degree,2); 1743 if (!sf->degreeknown) { 1744 PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1745 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM)); 1746 PetscCall(PetscFree(sf->degreetmp)); 1747 sf->degreeknown = PETSC_TRUE; 1748 } 1749 *degree = sf->degree; 1750 PetscFunctionReturn(0); 1751 } 1752 1753 /*@C 1754 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1755 Each multi-root is assigned index of the corresponding original root. 1756 1757 Collective 1758 1759 Input Parameters: 1760 + sf - star forest 1761 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1762 1763 Output Parameters: 1764 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1765 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1766 1767 Level: developer 1768 1769 Notes: 1770 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1771 1772 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1773 @*/ 1774 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1775 { 1776 PetscSF msf; 1777 PetscInt i, j, k, nroots, nmroots; 1778 1779 PetscFunctionBegin; 1780 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1781 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1782 if (nroots) PetscValidIntPointer(degree,2); 1783 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1784 PetscValidPointer(multiRootsOrigNumbering,4); 1785 PetscCall(PetscSFGetMultiSF(sf,&msf)); 1786 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1787 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1788 for (i=0,j=0,k=0; i<nroots; i++) { 1789 if (!degree[i]) continue; 1790 for (j=0; j<degree[i]; j++,k++) { 1791 (*multiRootsOrigNumbering)[k] = i; 1792 } 1793 } 1794 PetscCheck(k == nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1795 if (nMultiRoots) *nMultiRoots = nmroots; 1796 PetscFunctionReturn(0); 1797 } 1798 1799 /*@C 1800 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1801 1802 Collective 1803 1804 Input Parameters: 1805 + sf - star forest 1806 . unit - data type 1807 - leafdata - leaf data to gather to roots 1808 1809 Output Parameter: 1810 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1811 1812 Level: intermediate 1813 1814 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1815 @*/ 1816 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1817 { 1818 PetscSF multi = NULL; 1819 1820 PetscFunctionBegin; 1821 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1822 PetscCall(PetscSFSetUp(sf)); 1823 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1824 PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE)); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 /*@C 1829 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1830 1831 Collective 1832 1833 Input Parameters: 1834 + sf - star forest 1835 . unit - data type 1836 - leafdata - leaf data to gather to roots 1837 1838 Output Parameter: 1839 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1840 1841 Level: intermediate 1842 1843 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1844 @*/ 1845 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1846 { 1847 PetscSF multi = NULL; 1848 1849 PetscFunctionBegin; 1850 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1851 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1852 PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE)); 1853 PetscFunctionReturn(0); 1854 } 1855 1856 /*@C 1857 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1858 1859 Collective 1860 1861 Input Parameters: 1862 + sf - star forest 1863 . unit - data type 1864 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1865 1866 Output Parameter: 1867 . leafdata - leaf data to be update with personal data from each respective root 1868 1869 Level: intermediate 1870 1871 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1872 @*/ 1873 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1874 { 1875 PetscSF multi = NULL; 1876 1877 PetscFunctionBegin; 1878 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1879 PetscCall(PetscSFSetUp(sf)); 1880 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1881 PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE)); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 /*@C 1886 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1887 1888 Collective 1889 1890 Input Parameters: 1891 + sf - star forest 1892 . unit - data type 1893 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1894 1895 Output Parameter: 1896 . leafdata - leaf data to be update with personal data from each respective root 1897 1898 Level: intermediate 1899 1900 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1901 @*/ 1902 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1903 { 1904 PetscSF multi = NULL; 1905 1906 PetscFunctionBegin; 1907 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1908 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1909 PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE)); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1914 { 1915 PetscInt i, n, nleaves; 1916 const PetscInt *ilocal = NULL; 1917 PetscHSetI seen; 1918 1919 PetscFunctionBegin; 1920 if (PetscDefined(USE_DEBUG)) { 1921 PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL)); 1922 PetscCall(PetscHSetICreate(&seen)); 1923 for (i = 0; i < nleaves; i++) { 1924 const PetscInt leaf = ilocal ? ilocal[i] : i; 1925 PetscCall(PetscHSetIAdd(seen,leaf)); 1926 } 1927 PetscCall(PetscHSetIGetSize(seen,&n)); 1928 PetscCheck(n == nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1929 PetscCall(PetscHSetIDestroy(&seen)); 1930 } 1931 PetscFunctionReturn(0); 1932 } 1933 1934 /*@ 1935 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1936 1937 Input Parameters: 1938 + sfA - The first PetscSF 1939 - sfB - The second PetscSF 1940 1941 Output Parameters: 1942 . sfBA - The composite SF 1943 1944 Level: developer 1945 1946 Notes: 1947 Currently, the two SFs must be defined on congruent communicators and they must be true star 1948 forests, i.e. the same leaf is not connected with different roots. 1949 1950 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1951 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1952 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1953 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1954 1955 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1956 @*/ 1957 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1958 { 1959 const PetscSFNode *remotePointsA,*remotePointsB; 1960 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1961 const PetscInt *localPointsA,*localPointsB; 1962 PetscInt *localPointsBA; 1963 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1964 PetscBool denseB; 1965 1966 PetscFunctionBegin; 1967 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1968 PetscSFCheckGraphSet(sfA,1); 1969 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1970 PetscSFCheckGraphSet(sfB,2); 1971 PetscCheckSameComm(sfA,1,sfB,2); 1972 PetscValidPointer(sfBA,3); 1973 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1974 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1975 1976 PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA)); 1977 PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB)); 1978 if (localPointsA) { 1979 PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA)); 1980 for (i=0; i<numRootsB; i++) { 1981 reorderedRemotePointsA[i].rank = -1; 1982 reorderedRemotePointsA[i].index = -1; 1983 } 1984 for (i=0; i<numLeavesA; i++) { 1985 if (localPointsA[i] >= numRootsB) continue; 1986 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1987 } 1988 remotePointsA = reorderedRemotePointsA; 1989 } 1990 PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf)); 1991 PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB)); 1992 PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE)); 1993 PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE)); 1994 PetscCall(PetscFree(reorderedRemotePointsA)); 1995 1996 denseB = (PetscBool)!localPointsB; 1997 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1998 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1999 else numLeavesBA++; 2000 } 2001 if (denseB) { 2002 localPointsBA = NULL; 2003 remotePointsBA = leafdataB; 2004 } else { 2005 PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA)); 2006 PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA)); 2007 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2008 const PetscInt l = localPointsB ? localPointsB[i] : i; 2009 2010 if (leafdataB[l-minleaf].rank == -1) continue; 2011 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2012 localPointsBA[numLeavesBA] = l; 2013 numLeavesBA++; 2014 } 2015 PetscCall(PetscFree(leafdataB)); 2016 } 2017 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA)); 2018 PetscCall(PetscSFSetFromOptions(*sfBA)); 2019 PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER)); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 /*@ 2024 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2025 2026 Input Parameters: 2027 + sfA - The first PetscSF 2028 - sfB - The second PetscSF 2029 2030 Output Parameters: 2031 . sfBA - The composite SF. 2032 2033 Level: developer 2034 2035 Notes: 2036 Currently, the two SFs must be defined on congruent communicators and they must be true star 2037 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2038 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2039 2040 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2041 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2042 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2043 a Reduce on sfB, on connected roots. 2044 2045 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2046 @*/ 2047 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2048 { 2049 const PetscSFNode *remotePointsA,*remotePointsB; 2050 PetscSFNode *remotePointsBA; 2051 const PetscInt *localPointsA,*localPointsB; 2052 PetscSFNode *reorderedRemotePointsA = NULL; 2053 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2054 MPI_Op op; 2055 #if defined(PETSC_USE_64BIT_INDICES) 2056 PetscBool iswin; 2057 #endif 2058 2059 PetscFunctionBegin; 2060 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2061 PetscSFCheckGraphSet(sfA,1); 2062 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2063 PetscSFCheckGraphSet(sfB,2); 2064 PetscCheckSameComm(sfA,1,sfB,2); 2065 PetscValidPointer(sfBA,3); 2066 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2067 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2068 2069 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2070 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2071 2072 /* TODO: Check roots of sfB have degree of 1 */ 2073 /* Once we implement it, we can replace the MPI_MAXLOC 2074 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2075 We use MPI_MAXLOC only to have a deterministic output from this routine if 2076 the root condition is not meet. 2077 */ 2078 op = MPI_MAXLOC; 2079 #if defined(PETSC_USE_64BIT_INDICES) 2080 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2081 PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin)); 2082 if (iswin) op = MPI_REPLACE; 2083 #endif 2084 2085 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2086 PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA)); 2087 for (i=0; i<maxleaf - minleaf + 1; i++) { 2088 reorderedRemotePointsA[i].rank = -1; 2089 reorderedRemotePointsA[i].index = -1; 2090 } 2091 if (localPointsA) { 2092 for (i=0; i<numLeavesA; i++) { 2093 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2094 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2095 } 2096 } else { 2097 for (i=0; i<numLeavesA; i++) { 2098 if (i > maxleaf || i < minleaf) continue; 2099 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2100 } 2101 } 2102 2103 PetscCall(PetscMalloc1(numRootsB,&localPointsBA)); 2104 PetscCall(PetscMalloc1(numRootsB,&remotePointsBA)); 2105 for (i=0; i<numRootsB; i++) { 2106 remotePointsBA[i].rank = -1; 2107 remotePointsBA[i].index = -1; 2108 } 2109 2110 PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op)); 2111 PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op)); 2112 PetscCall(PetscFree(reorderedRemotePointsA)); 2113 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2114 if (remotePointsBA[i].rank == -1) continue; 2115 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2116 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2117 localPointsBA[numLeavesBA] = i; 2118 numLeavesBA++; 2119 } 2120 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA)); 2121 PetscCall(PetscSFSetFromOptions(*sfBA)); 2122 PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER)); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 /* 2127 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2128 2129 Input Parameters: 2130 . sf - The global PetscSF 2131 2132 Output Parameters: 2133 . out - The local PetscSF 2134 */ 2135 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2136 { 2137 MPI_Comm comm; 2138 PetscMPIInt myrank; 2139 const PetscInt *ilocal; 2140 const PetscSFNode *iremote; 2141 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2142 PetscSFNode *liremote; 2143 PetscSF lsf; 2144 2145 PetscFunctionBegin; 2146 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2147 if (sf->ops->CreateLocalSF) { 2148 PetscCall((*sf->ops->CreateLocalSF)(sf,out)); 2149 } else { 2150 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2151 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 2152 PetscCallMPI(MPI_Comm_rank(comm,&myrank)); 2153 2154 /* Find out local edges and build a local SF */ 2155 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 2156 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2157 PetscCall(PetscMalloc1(lnleaves,&lilocal)); 2158 PetscCall(PetscMalloc1(lnleaves,&liremote)); 2159 2160 for (i=j=0; i<nleaves; i++) { 2161 if (iremote[i].rank == (PetscInt)myrank) { 2162 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2163 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2164 liremote[j].index = iremote[i].index; 2165 j++; 2166 } 2167 } 2168 PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf)); 2169 PetscCall(PetscSFSetFromOptions(lsf)); 2170 PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER)); 2171 PetscCall(PetscSFSetUp(lsf)); 2172 *out = lsf; 2173 } 2174 PetscFunctionReturn(0); 2175 } 2176 2177 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2178 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2179 { 2180 PetscMemType rootmtype,leafmtype; 2181 2182 PetscFunctionBegin; 2183 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2184 PetscCall(PetscSFSetUp(sf)); 2185 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 2186 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 2187 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 2188 if (sf->ops->BcastToZero) { 2189 PetscCall((*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata)); 2190 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2191 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 2192 PetscFunctionReturn(0); 2193 } 2194 2195 /*@ 2196 PetscSFConcatenate - concatenate multiple SFs into one 2197 2198 Input Parameters: 2199 + comm - the communicator 2200 . nsfs - the number of input PetscSF 2201 . sfs - the array of input PetscSF 2202 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE) 2203 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage 2204 2205 Output Parameters: 2206 . newsf - The resulting PetscSF 2207 2208 Level: developer 2209 2210 Notes: 2211 The communicator of all SFs in sfs must be comm. 2212 2213 The offsets in leafOffsets are added to the original leaf indices. 2214 2215 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2216 In this case, NULL is also passed as ilocal to the resulting SF. 2217 2218 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2219 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2220 2221 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph() 2222 @*/ 2223 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) 2224 { 2225 PetscInt i, s, nLeaves, nRoots; 2226 PetscInt *leafArrayOffsets; 2227 PetscInt *ilocal_new; 2228 PetscSFNode *iremote_new; 2229 PetscInt *rootOffsets; 2230 PetscBool all_ilocal_null = PETSC_FALSE; 2231 PetscMPIInt rank; 2232 2233 PetscFunctionBegin; 2234 { 2235 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2236 2237 PetscCall(PetscSFCreate(comm,&dummy)); 2238 PetscValidLogicalCollectiveInt(dummy,nsfs,2); 2239 PetscValidPointer(sfs,3); 2240 for (i=0; i<nsfs; i++) { 2241 PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3); 2242 PetscCheckSameComm(dummy,1,sfs[i],3); 2243 } 2244 PetscValidLogicalCollectiveBool(dummy,shareRoots,4); 2245 if (leafOffsets) PetscValidIntPointer(leafOffsets,5); 2246 PetscValidPointer(newsf,6); 2247 PetscCall(PetscSFDestroy(&dummy)); 2248 } 2249 if (!nsfs) { 2250 PetscCall(PetscSFCreate(comm, newsf)); 2251 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2252 PetscFunctionReturn(0); 2253 } 2254 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2255 2256 PetscCall(PetscCalloc1(nsfs+1, &rootOffsets)); 2257 if (shareRoots) { 2258 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2259 if (PetscDefined(USE_DEBUG)) { 2260 for (s=1; s<nsfs; s++) { 2261 PetscInt nr; 2262 2263 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2264 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2265 } 2266 } 2267 } else { 2268 for (s=0; s<nsfs; s++) { 2269 PetscInt nr; 2270 2271 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2272 rootOffsets[s+1] = rootOffsets[s] + nr; 2273 } 2274 nRoots = rootOffsets[nsfs]; 2275 } 2276 2277 /* Calculate leaf array offsets and automatic root offsets */ 2278 PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets)); 2279 leafArrayOffsets[0] = 0; 2280 for (s=0; s<nsfs; s++) { 2281 PetscInt nl; 2282 2283 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2284 leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl; 2285 } 2286 nLeaves = leafArrayOffsets[nsfs]; 2287 2288 if (!leafOffsets) { 2289 all_ilocal_null = PETSC_TRUE; 2290 for (s=0; s<nsfs; s++) { 2291 const PetscInt *ilocal; 2292 2293 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2294 if (ilocal) { 2295 all_ilocal_null = PETSC_FALSE; 2296 break; 2297 } 2298 } 2299 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2300 } 2301 2302 /* Renumber and concatenate local leaves */ 2303 ilocal_new = NULL; 2304 if (!all_ilocal_null) { 2305 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2306 for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1; 2307 for (s = 0; s<nsfs; s++) { 2308 const PetscInt *ilocal; 2309 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2310 PetscInt i, nleaves_l; 2311 2312 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2313 for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2314 } 2315 } 2316 2317 /* Renumber and concatenate remote roots */ 2318 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2319 for (i = 0; i < nLeaves; i++) { 2320 iremote_new[i].rank = -1; 2321 iremote_new[i].index = -1; 2322 } 2323 for (s = 0; s<nsfs; s++) { 2324 PetscInt i, nl, nr; 2325 PetscSF tmp_sf; 2326 const PetscSFNode *iremote; 2327 PetscSFNode *tmp_rootdata; 2328 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2329 2330 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2331 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2332 /* create helper SF with contiguous leaves */ 2333 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES)); 2334 PetscCall(PetscSFSetUp(tmp_sf)); 2335 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2336 for (i = 0; i < nr; i++) { 2337 tmp_rootdata[i].index = i + rootOffsets[s]; 2338 tmp_rootdata[i].rank = (PetscInt) rank; 2339 } 2340 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2341 PetscCall(PetscSFBcastEnd( tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2342 PetscCall(PetscSFDestroy(&tmp_sf)); 2343 PetscCall(PetscFree(tmp_rootdata)); 2344 } 2345 2346 /* Build the new SF */ 2347 PetscCall(PetscSFCreate(comm, newsf)); 2348 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2349 PetscCall(PetscSFSetUp(*newsf)); 2350 PetscCall(PetscFree(rootOffsets)); 2351 PetscCall(PetscFree(leafArrayOffsets)); 2352 PetscFunctionReturn(0); 2353 } 2354