1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) do { \ 19 if (PetscUnlikely(!(sf)->graphset)) \ 20 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 21 } while (0) 22 #else 23 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window -Use MPI-3 one-sided window for communication 43 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 44 45 Level: intermediate 46 47 Notes: 48 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 49 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 50 SFs are optimized and they have better performance than general SFs. 51 52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 53 @*/ 54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 55 { 56 PetscSF b; 57 58 PetscFunctionBegin; 59 PetscValidPointer(sf,2); 60 PetscCall(PetscSFInitializePackage()); 61 62 PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView)); 63 64 b->nroots = -1; 65 b->nleaves = -1; 66 b->minleaf = PETSC_MAX_INT; 67 b->maxleaf = PETSC_MIN_INT; 68 b->nranks = -1; 69 b->rankorder = PETSC_TRUE; 70 b->ingroup = MPI_GROUP_NULL; 71 b->outgroup = MPI_GROUP_NULL; 72 b->graphset = PETSC_FALSE; 73 #if defined(PETSC_HAVE_DEVICE) 74 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 75 b->use_stream_aware_mpi = PETSC_FALSE; 76 b->unknown_input_stream= PETSC_FALSE; 77 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 78 b->backend = PETSCSF_BACKEND_KOKKOS; 79 #elif defined(PETSC_HAVE_CUDA) 80 b->backend = PETSCSF_BACKEND_CUDA; 81 #elif defined(PETSC_HAVE_HIP) 82 b->backend = PETSCSF_BACKEND_HIP; 83 #endif 84 85 #if defined(PETSC_HAVE_NVSHMEM) 86 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 87 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 88 PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL)); 89 PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL)); 90 #endif 91 #endif 92 b->vscat.from_n = -1; 93 b->vscat.to_n = -1; 94 b->vscat.unit = MPIU_SCALAR; 95 *sf = b; 96 PetscFunctionReturn(0); 97 } 98 99 /*@ 100 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 101 102 Collective 103 104 Input Parameter: 105 . sf - star forest 106 107 Level: advanced 108 109 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 110 @*/ 111 PetscErrorCode PetscSFReset(PetscSF sf) 112 { 113 PetscFunctionBegin; 114 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 115 if (sf->ops->Reset) PetscCall((*sf->ops->Reset)(sf)); 116 sf->nroots = -1; 117 sf->nleaves = -1; 118 sf->minleaf = PETSC_MAX_INT; 119 sf->maxleaf = PETSC_MIN_INT; 120 sf->mine = NULL; 121 sf->remote = NULL; 122 sf->graphset = PETSC_FALSE; 123 PetscCall(PetscFree(sf->mine_alloc)); 124 PetscCall(PetscFree(sf->remote_alloc)); 125 sf->nranks = -1; 126 PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote)); 127 sf->degreeknown = PETSC_FALSE; 128 PetscCall(PetscFree(sf->degree)); 129 if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup)); 130 if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup)); 131 if (sf->multi) sf->multi->multi = NULL; 132 PetscCall(PetscSFDestroy(&sf->multi)); 133 PetscCall(PetscLayoutDestroy(&sf->map)); 134 135 #if defined(PETSC_HAVE_DEVICE) 136 for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i])); 137 #endif 138 139 sf->setupcalled = PETSC_FALSE; 140 PetscFunctionReturn(0); 141 } 142 143 /*@C 144 PetscSFSetType - Set the PetscSF communication implementation 145 146 Collective on PetscSF 147 148 Input Parameters: 149 + sf - the PetscSF context 150 - type - a known method 151 152 Options Database Key: 153 . -sf_type <type> - Sets the method; use -help for a list 154 of available methods (for instance, window, basic, neighbor) 155 156 Notes: 157 See "include/petscsf.h" for available methods (for instance) 158 + PETSCSFWINDOW - MPI-2/3 one-sided 159 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 160 161 Level: intermediate 162 163 .seealso: PetscSFType, PetscSFCreate() 164 @*/ 165 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 166 { 167 PetscBool match; 168 PetscErrorCode (*r)(PetscSF); 169 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 172 PetscValidCharPointer(type,2); 173 174 PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match)); 175 if (match) PetscFunctionReturn(0); 176 177 PetscCall(PetscFunctionListFind(PetscSFList,type,&r)); 178 PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 179 /* Destroy the previous PetscSF implementation context */ 180 if (sf->ops->Destroy) PetscCall((*(sf)->ops->Destroy)(sf)); 181 PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops))); 182 PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type)); 183 PetscCall((*r)(sf)); 184 PetscFunctionReturn(0); 185 } 186 187 /*@C 188 PetscSFGetType - Get the PetscSF communication implementation 189 190 Not Collective 191 192 Input Parameter: 193 . sf - the PetscSF context 194 195 Output Parameter: 196 . type - the PetscSF type name 197 198 Level: intermediate 199 200 .seealso: PetscSFSetType(), PetscSFCreate() 201 @*/ 202 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 203 { 204 PetscFunctionBegin; 205 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 206 PetscValidPointer(type,2); 207 *type = ((PetscObject)sf)->type_name; 208 PetscFunctionReturn(0); 209 } 210 211 /*@C 212 PetscSFDestroy - destroy star forest 213 214 Collective 215 216 Input Parameter: 217 . sf - address of star forest 218 219 Level: intermediate 220 221 .seealso: PetscSFCreate(), PetscSFReset() 222 @*/ 223 PetscErrorCode PetscSFDestroy(PetscSF *sf) 224 { 225 PetscFunctionBegin; 226 if (!*sf) PetscFunctionReturn(0); 227 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 228 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 229 PetscCall(PetscSFReset(*sf)); 230 if ((*sf)->ops->Destroy) PetscCall((*(*sf)->ops->Destroy)(*sf)); 231 PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf)); 232 if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit)); 233 PetscCall(PetscHeaderDestroy(sf)); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 238 { 239 PetscInt i, nleaves; 240 PetscMPIInt size; 241 const PetscInt *ilocal; 242 const PetscSFNode *iremote; 243 244 PetscFunctionBegin; 245 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 246 PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote)); 247 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size)); 248 for (i = 0; i < nleaves; i++) { 249 const PetscInt rank = iremote[i].rank; 250 const PetscInt remote = iremote[i].index; 251 const PetscInt leaf = ilocal ? ilocal[i] : i; 252 PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 253 PetscCheck(remote >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 254 PetscCheck(leaf >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 255 } 256 PetscFunctionReturn(0); 257 } 258 259 /*@ 260 PetscSFSetUp - set up communication structures 261 262 Collective 263 264 Input Parameter: 265 . sf - star forest communication object 266 267 Level: beginner 268 269 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 270 @*/ 271 PetscErrorCode PetscSFSetUp(PetscSF sf) 272 { 273 PetscFunctionBegin; 274 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 275 PetscSFCheckGraphSet(sf,1); 276 if (sf->setupcalled) PetscFunctionReturn(0); 277 PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0)); 278 PetscCall(PetscSFCheckGraphValid_Private(sf)); 279 if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */ 280 if (sf->ops->SetUp) PetscCall((*sf->ops->SetUp)(sf)); 281 #if defined(PETSC_HAVE_CUDA) 282 if (sf->backend == PETSCSF_BACKEND_CUDA) { 283 sf->ops->Malloc = PetscSFMalloc_CUDA; 284 sf->ops->Free = PetscSFFree_CUDA; 285 } 286 #endif 287 #if defined(PETSC_HAVE_HIP) 288 if (sf->backend == PETSCSF_BACKEND_HIP) { 289 sf->ops->Malloc = PetscSFMalloc_HIP; 290 sf->ops->Free = PetscSFFree_HIP; 291 } 292 #endif 293 294 # 295 #if defined(PETSC_HAVE_KOKKOS) 296 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 297 sf->ops->Malloc = PetscSFMalloc_Kokkos; 298 sf->ops->Free = PetscSFFree_Kokkos; 299 } 300 #endif 301 PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0)); 302 sf->setupcalled = PETSC_TRUE; 303 PetscFunctionReturn(0); 304 } 305 306 /*@ 307 PetscSFSetFromOptions - set PetscSF options using the options database 308 309 Logically Collective 310 311 Input Parameter: 312 . sf - star forest 313 314 Options Database Keys: 315 + -sf_type - implementation type, see PetscSFSetType() 316 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 317 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 318 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 319 If true, this option only works with -use_gpu_aware_mpi 1. 320 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 321 If true, this option only works with -use_gpu_aware_mpi 1. 322 323 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 324 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 325 the only available is kokkos. 326 327 Level: intermediate 328 @*/ 329 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 330 { 331 PetscSFType deft; 332 char type[256]; 333 PetscBool flg; 334 335 PetscFunctionBegin; 336 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 337 PetscObjectOptionsBegin((PetscObject)sf); 338 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 339 PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg)); 340 PetscCall(PetscSFSetType(sf,flg ? type : deft)); 341 PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL)); 342 #if defined(PETSC_HAVE_DEVICE) 343 { 344 char backendstr[32] = {0}; 345 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 346 /* Change the defaults set in PetscSFCreate() with command line options */ 347 PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL)); 348 PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL)); 349 PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set)); 350 PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda)); 351 PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos)); 352 PetscCall(PetscStrcasecmp("hip",backendstr,&isHip)); 353 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 354 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 355 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 356 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 357 else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 358 #elif defined(PETSC_HAVE_KOKKOS) 359 PetscCheck(!set || isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 360 #endif 361 } 362 #endif 363 if (sf->ops->SetFromOptions) PetscCall((*sf->ops->SetFromOptions)(PetscOptionsObject,sf)); 364 PetscOptionsEnd(); 365 PetscFunctionReturn(0); 366 } 367 368 /*@ 369 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 370 371 Logically Collective 372 373 Input Parameters: 374 + sf - star forest 375 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 376 377 Level: advanced 378 379 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 380 @*/ 381 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 382 { 383 PetscFunctionBegin; 384 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 385 PetscValidLogicalCollectiveBool(sf,flg,2); 386 PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 387 sf->rankorder = flg; 388 PetscFunctionReturn(0); 389 } 390 391 /*@C 392 PetscSFSetGraph - Set a parallel star forest 393 394 Collective 395 396 Input Parameters: 397 + sf - star forest 398 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 399 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 400 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 401 during setup in debug mode) 402 . localmode - copy mode for ilocal 403 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 404 during setup in debug mode) 405 - remotemode - copy mode for iremote 406 407 Level: intermediate 408 409 Notes: 410 Leaf indices in ilocal must be unique, otherwise an error occurs. 411 412 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 413 In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 414 PETSc might modify the respective array; 415 if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 416 Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 417 418 Fortran Notes: 419 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 420 421 Developer Notes: 422 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 423 This also allows to compare leaf sets of two SFs easily. 424 425 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 426 @*/ 427 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode) 428 { 429 PetscBool unique, contiguous; 430 431 PetscFunctionBegin; 432 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 433 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 434 if (nleaves > 0) PetscValidPointer(iremote,6); 435 PetscCheck(nroots >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 436 PetscCheck(nleaves >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 437 PetscCheck(localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode); 438 PetscCheck(remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode); 439 440 if (sf->nroots >= 0) { /* Reset only if graph already set */ 441 PetscCall(PetscSFReset(sf)); 442 } 443 444 PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0)); 445 446 sf->nroots = nroots; 447 sf->nleaves = nleaves; 448 449 if (localmode == PETSC_COPY_VALUES && ilocal) { 450 PetscInt *tlocal = NULL; 451 452 PetscCall(PetscMalloc1(nleaves,&tlocal)); 453 PetscCall(PetscArraycpy(tlocal,ilocal,nleaves)); 454 ilocal = tlocal; 455 } 456 if (remotemode == PETSC_COPY_VALUES) { 457 PetscSFNode *tremote = NULL; 458 459 PetscCall(PetscMalloc1(nleaves,&tremote)); 460 PetscCall(PetscArraycpy(tremote,iremote,nleaves)); 461 iremote = tremote; 462 } 463 464 if (nleaves && ilocal) { 465 PetscSFNode work; 466 467 PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 468 PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 469 unique = PetscNot(unique); 470 PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF"); 471 sf->minleaf = ilocal[0]; 472 sf->maxleaf = ilocal[nleaves-1]; 473 contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1); 474 } else { 475 sf->minleaf = 0; 476 sf->maxleaf = nleaves - 1; 477 unique = PETSC_TRUE; 478 contiguous = PETSC_TRUE; 479 } 480 481 if (contiguous) { 482 if (localmode == PETSC_USE_POINTER) { 483 ilocal = NULL; 484 } else { 485 PetscCall(PetscFree(ilocal)); 486 } 487 } 488 sf->mine = ilocal; 489 if (localmode == PETSC_USE_POINTER) { 490 sf->mine_alloc = NULL; 491 } else { 492 sf->mine_alloc = ilocal; 493 } 494 sf->remote = iremote; 495 if (remotemode == PETSC_USE_POINTER) { 496 sf->remote_alloc = NULL; 497 } else { 498 sf->remote_alloc = iremote; 499 } 500 PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0)); 501 sf->graphset = PETSC_TRUE; 502 PetscFunctionReturn(0); 503 } 504 505 /*@ 506 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 507 508 Collective 509 510 Input Parameters: 511 + sf - The PetscSF 512 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 513 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 514 515 Notes: 516 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 517 n and N are local and global sizes of x respectively. 518 519 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 520 sequential vectors y on all ranks. 521 522 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 523 sequential vector y on rank 0. 524 525 In above cases, entries of x are roots and entries of y are leaves. 526 527 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 528 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 529 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 530 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 531 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 532 533 In this case, roots and leaves are symmetric. 534 535 Level: intermediate 536 @*/ 537 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 538 { 539 MPI_Comm comm; 540 PetscInt n,N,res[2]; 541 PetscMPIInt rank,size; 542 PetscSFType type; 543 544 PetscFunctionBegin; 545 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 546 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 547 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 548 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 549 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 550 PetscCallMPI(MPI_Comm_size(comm,&size)); 551 552 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 553 type = PETSCSFALLTOALL; 554 PetscCall(PetscLayoutCreate(comm,&sf->map)); 555 PetscCall(PetscLayoutSetLocalSize(sf->map,size)); 556 PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size)); 557 PetscCall(PetscLayoutSetUp(sf->map)); 558 } else { 559 PetscCall(PetscLayoutGetLocalSize(map,&n)); 560 PetscCall(PetscLayoutGetSize(map,&N)); 561 res[0] = n; 562 res[1] = -n; 563 /* Check if n are same over all ranks so that we can optimize it */ 564 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm)); 565 if (res[0] == -res[1]) { /* same n */ 566 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 567 } else { 568 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 569 } 570 PetscCall(PetscLayoutReference(map,&sf->map)); 571 } 572 PetscCall(PetscSFSetType(sf,type)); 573 574 sf->pattern = pattern; 575 sf->mine = NULL; /* Contiguous */ 576 577 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 578 Also set other easy stuff. 579 */ 580 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 581 sf->nleaves = N; 582 sf->nroots = n; 583 sf->nranks = size; 584 sf->minleaf = 0; 585 sf->maxleaf = N - 1; 586 } else if (pattern == PETSCSF_PATTERN_GATHER) { 587 sf->nleaves = rank ? 0 : N; 588 sf->nroots = n; 589 sf->nranks = rank ? 0 : size; 590 sf->minleaf = 0; 591 sf->maxleaf = rank ? -1 : N - 1; 592 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 593 sf->nleaves = size; 594 sf->nroots = size; 595 sf->nranks = size; 596 sf->minleaf = 0; 597 sf->maxleaf = size - 1; 598 } 599 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 600 sf->graphset = PETSC_TRUE; 601 PetscFunctionReturn(0); 602 } 603 604 /*@ 605 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 606 607 Collective 608 609 Input Parameter: 610 . sf - star forest to invert 611 612 Output Parameter: 613 . isf - inverse of sf 614 615 Level: advanced 616 617 Notes: 618 All roots must have degree 1. 619 620 The local space may be a permutation, but cannot be sparse. 621 622 .seealso: PetscSFSetGraph() 623 @*/ 624 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 625 { 626 PetscMPIInt rank; 627 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 628 const PetscInt *ilocal; 629 PetscSFNode *roots,*leaves; 630 631 PetscFunctionBegin; 632 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 633 PetscSFCheckGraphSet(sf,1); 634 PetscValidPointer(isf,2); 635 636 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL)); 637 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 638 639 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 640 PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves)); 641 for (i=0; i<maxlocal; i++) { 642 leaves[i].rank = rank; 643 leaves[i].index = i; 644 } 645 for (i=0; i <nroots; i++) { 646 roots[i].rank = -1; 647 roots[i].index = -1; 648 } 649 PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE)); 650 PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE)); 651 652 /* Check whether our leaves are sparse */ 653 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 654 if (count == nroots) newilocal = NULL; 655 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 656 PetscCall(PetscMalloc1(count,&newilocal)); 657 for (i=0,count=0; i<nroots; i++) { 658 if (roots[i].rank >= 0) { 659 newilocal[count] = i; 660 roots[count].rank = roots[i].rank; 661 roots[count].index = roots[i].index; 662 count++; 663 } 664 } 665 } 666 667 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf)); 668 PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES)); 669 PetscCall(PetscFree2(roots,leaves)); 670 PetscFunctionReturn(0); 671 } 672 673 /*@ 674 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 675 676 Collective 677 678 Input Parameters: 679 + sf - communication object to duplicate 680 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 681 682 Output Parameter: 683 . newsf - new communication object 684 685 Level: beginner 686 687 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 688 @*/ 689 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 690 { 691 PetscSFType type; 692 MPI_Datatype dtype=MPIU_SCALAR; 693 694 PetscFunctionBegin; 695 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 696 PetscValidLogicalCollectiveEnum(sf,opt,2); 697 PetscValidPointer(newsf,3); 698 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf)); 699 PetscCall(PetscSFGetType(sf,&type)); 700 if (type) PetscCall(PetscSFSetType(*newsf,type)); 701 if (opt == PETSCSF_DUPLICATE_GRAPH) { 702 PetscSFCheckGraphSet(sf,1); 703 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 704 PetscInt nroots,nleaves; 705 const PetscInt *ilocal; 706 const PetscSFNode *iremote; 707 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 708 PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES)); 709 } else { 710 PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern)); 711 } 712 } 713 /* Since oldtype is committed, so is newtype, according to MPI */ 714 if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype)); 715 (*newsf)->vscat.bs = sf->vscat.bs; 716 (*newsf)->vscat.unit = dtype; 717 (*newsf)->vscat.to_n = sf->vscat.to_n; 718 (*newsf)->vscat.from_n = sf->vscat.from_n; 719 /* Do not copy lsf. Build it on demand since it is rarely used */ 720 721 #if defined(PETSC_HAVE_DEVICE) 722 (*newsf)->backend = sf->backend; 723 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 724 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 725 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 726 #endif 727 if (sf->ops->Duplicate) PetscCall((*sf->ops->Duplicate)(sf,opt,*newsf)); 728 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 729 PetscFunctionReturn(0); 730 } 731 732 /*@C 733 PetscSFGetGraph - Get the graph specifying a parallel star forest 734 735 Not Collective 736 737 Input Parameter: 738 . sf - star forest 739 740 Output Parameters: 741 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 742 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 743 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 744 - iremote - remote locations of root vertices for each leaf on the current process 745 746 Notes: 747 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 748 749 The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 750 see its manpage for details. 751 752 Fortran Notes: 753 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 754 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 755 756 To check for a NULL ilocal use 757 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 758 759 Level: intermediate 760 761 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 762 @*/ 763 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 764 { 765 PetscFunctionBegin; 766 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 767 if (sf->ops->GetGraph) { 768 PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote)); 769 } else { 770 if (nroots) *nroots = sf->nroots; 771 if (nleaves) *nleaves = sf->nleaves; 772 if (ilocal) *ilocal = sf->mine; 773 if (iremote) *iremote = sf->remote; 774 } 775 PetscFunctionReturn(0); 776 } 777 778 /*@ 779 PetscSFGetLeafRange - Get the active leaf ranges 780 781 Not Collective 782 783 Input Parameter: 784 . sf - star forest 785 786 Output Parameters: 787 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 788 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 789 790 Level: developer 791 792 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 793 @*/ 794 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 795 { 796 PetscFunctionBegin; 797 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 798 PetscSFCheckGraphSet(sf,1); 799 if (minleaf) *minleaf = sf->minleaf; 800 if (maxleaf) *maxleaf = sf->maxleaf; 801 PetscFunctionReturn(0); 802 } 803 804 /*@C 805 PetscSFViewFromOptions - View from Options 806 807 Collective on PetscSF 808 809 Input Parameters: 810 + A - the star forest 811 . obj - Optional object 812 - name - command line option 813 814 Level: intermediate 815 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 816 @*/ 817 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 818 { 819 PetscFunctionBegin; 820 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 821 PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name)); 822 PetscFunctionReturn(0); 823 } 824 825 /*@C 826 PetscSFView - view a star forest 827 828 Collective 829 830 Input Parameters: 831 + sf - star forest 832 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 833 834 Level: beginner 835 836 .seealso: PetscSFCreate(), PetscSFSetGraph() 837 @*/ 838 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 839 { 840 PetscBool iascii; 841 PetscViewerFormat format; 842 843 PetscFunctionBegin; 844 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 845 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer)); 846 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 847 PetscCheckSameComm(sf,1,viewer,2); 848 if (sf->graphset) PetscCall(PetscSFSetUp(sf)); 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 850 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 851 PetscMPIInt rank; 852 PetscInt ii,i,j; 853 854 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer)); 855 PetscCall(PetscViewerASCIIPushTab(viewer)); 856 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 857 if (!sf->graphset) { 858 PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n")); 859 PetscCall(PetscViewerASCIIPopTab(viewer)); 860 PetscFunctionReturn(0); 861 } 862 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 863 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 864 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks)); 865 for (i=0; i<sf->nleaves; i++) { 866 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index)); 867 } 868 PetscCall(PetscViewerFlush(viewer)); 869 PetscCall(PetscViewerGetFormat(viewer,&format)); 870 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 871 PetscMPIInt *tmpranks,*perm; 872 PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm)); 873 PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks)); 874 for (i=0; i<sf->nranks; i++) perm[i] = i; 875 PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm)); 876 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank)); 877 for (ii=0; ii<sf->nranks; ii++) { 878 i = perm[ii]; 879 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i])); 880 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 881 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j])); 882 } 883 } 884 PetscCall(PetscFree2(tmpranks,perm)); 885 } 886 PetscCall(PetscViewerFlush(viewer)); 887 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 888 } 889 PetscCall(PetscViewerASCIIPopTab(viewer)); 890 } 891 if (sf->ops->View) PetscCall((*sf->ops->View)(sf,viewer)); 892 PetscFunctionReturn(0); 893 } 894 895 /*@C 896 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 897 898 Not Collective 899 900 Input Parameter: 901 . sf - star forest 902 903 Output Parameters: 904 + nranks - number of ranks referenced by local part 905 . ranks - array of ranks 906 . roffset - offset in rmine/rremote for each rank (length nranks+1) 907 . rmine - concatenated array holding local indices referencing each remote rank 908 - rremote - concatenated array holding remote indices referenced for each remote rank 909 910 Level: developer 911 912 .seealso: PetscSFGetLeafRanks() 913 @*/ 914 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 915 { 916 PetscFunctionBegin; 917 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 918 PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 919 if (sf->ops->GetRootRanks) { 920 PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote)); 921 } else { 922 /* The generic implementation */ 923 if (nranks) *nranks = sf->nranks; 924 if (ranks) *ranks = sf->ranks; 925 if (roffset) *roffset = sf->roffset; 926 if (rmine) *rmine = sf->rmine; 927 if (rremote) *rremote = sf->rremote; 928 } 929 PetscFunctionReturn(0); 930 } 931 932 /*@C 933 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 934 935 Not Collective 936 937 Input Parameter: 938 . sf - star forest 939 940 Output Parameters: 941 + niranks - number of leaf ranks referencing roots on this process 942 . iranks - array of ranks 943 . ioffset - offset in irootloc for each rank (length niranks+1) 944 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 945 946 Level: developer 947 948 .seealso: PetscSFGetRootRanks() 949 @*/ 950 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 951 { 952 PetscFunctionBegin; 953 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 954 PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 955 if (sf->ops->GetLeafRanks) { 956 PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc)); 957 } else { 958 PetscSFType type; 959 PetscCall(PetscSFGetType(sf,&type)); 960 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 961 } 962 PetscFunctionReturn(0); 963 } 964 965 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 966 PetscInt i; 967 for (i=0; i<n; i++) { 968 if (needle == list[i]) return PETSC_TRUE; 969 } 970 return PETSC_FALSE; 971 } 972 973 /*@C 974 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 975 976 Collective 977 978 Input Parameters: 979 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 980 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 981 982 Level: developer 983 984 .seealso: PetscSFGetRootRanks() 985 @*/ 986 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 987 { 988 PetscTable table; 989 PetscTablePosition pos; 990 PetscMPIInt size,groupsize,*groupranks; 991 PetscInt *rcount,*ranks; 992 PetscInt i, irank = -1,orank = -1; 993 994 PetscFunctionBegin; 995 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 996 PetscSFCheckGraphSet(sf,1); 997 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size)); 998 PetscCall(PetscTableCreate(10,size,&table)); 999 for (i=0; i<sf->nleaves; i++) { 1000 /* Log 1-based rank */ 1001 PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES)); 1002 } 1003 PetscCall(PetscTableGetCount(table,&sf->nranks)); 1004 PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote)); 1005 PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks)); 1006 PetscCall(PetscTableGetHeadPosition(table,&pos)); 1007 for (i=0; i<sf->nranks; i++) { 1008 PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i])); 1009 ranks[i]--; /* Convert back to 0-based */ 1010 } 1011 PetscCall(PetscTableDestroy(&table)); 1012 1013 /* We expect that dgroup is reliably "small" while nranks could be large */ 1014 { 1015 MPI_Group group = MPI_GROUP_NULL; 1016 PetscMPIInt *dgroupranks; 1017 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1018 PetscCallMPI(MPI_Group_size(dgroup,&groupsize)); 1019 PetscCall(PetscMalloc1(groupsize,&dgroupranks)); 1020 PetscCall(PetscMalloc1(groupsize,&groupranks)); 1021 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1022 if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks)); 1023 PetscCallMPI(MPI_Group_free(&group)); 1024 PetscCall(PetscFree(dgroupranks)); 1025 } 1026 1027 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1028 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1029 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1030 if (InList(ranks[i],groupsize,groupranks)) break; 1031 } 1032 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1033 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1034 } 1035 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1036 PetscInt tmprank,tmpcount; 1037 1038 tmprank = ranks[i]; 1039 tmpcount = rcount[i]; 1040 ranks[i] = ranks[sf->ndranks]; 1041 rcount[i] = rcount[sf->ndranks]; 1042 ranks[sf->ndranks] = tmprank; 1043 rcount[sf->ndranks] = tmpcount; 1044 sf->ndranks++; 1045 } 1046 } 1047 PetscCall(PetscFree(groupranks)); 1048 PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount)); 1049 PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks)); 1050 sf->roffset[0] = 0; 1051 for (i=0; i<sf->nranks; i++) { 1052 PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i)); 1053 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1054 rcount[i] = 0; 1055 } 1056 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1057 /* short circuit */ 1058 if (orank != sf->remote[i].rank) { 1059 /* Search for index of iremote[i].rank in sf->ranks */ 1060 PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank)); 1061 if (irank < 0) { 1062 PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank)); 1063 if (irank >= 0) irank += sf->ndranks; 1064 } 1065 orank = sf->remote[i].rank; 1066 } 1067 PetscCheck(irank >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1068 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1069 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1070 rcount[irank]++; 1071 } 1072 PetscCall(PetscFree2(rcount,ranks)); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 /*@C 1077 PetscSFGetGroups - gets incoming and outgoing process groups 1078 1079 Collective 1080 1081 Input Parameter: 1082 . sf - star forest 1083 1084 Output Parameters: 1085 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1086 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1087 1088 Level: developer 1089 1090 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1091 @*/ 1092 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1093 { 1094 MPI_Group group = MPI_GROUP_NULL; 1095 1096 PetscFunctionBegin; 1097 PetscCheck(sf->nranks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1098 if (sf->ingroup == MPI_GROUP_NULL) { 1099 PetscInt i; 1100 const PetscInt *indegree; 1101 PetscMPIInt rank,*outranks,*inranks; 1102 PetscSFNode *remote; 1103 PetscSF bgcount; 1104 1105 /* Compute the number of incoming ranks */ 1106 PetscCall(PetscMalloc1(sf->nranks,&remote)); 1107 for (i=0; i<sf->nranks; i++) { 1108 remote[i].rank = sf->ranks[i]; 1109 remote[i].index = 0; 1110 } 1111 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount)); 1112 PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER)); 1113 PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree)); 1114 PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree)); 1115 /* Enumerate the incoming ranks */ 1116 PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks)); 1117 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 1118 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1119 PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks)); 1120 PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks)); 1121 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1122 PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup)); 1123 PetscCallMPI(MPI_Group_free(&group)); 1124 PetscCall(PetscFree2(inranks,outranks)); 1125 PetscCall(PetscSFDestroy(&bgcount)); 1126 } 1127 *incoming = sf->ingroup; 1128 1129 if (sf->outgroup == MPI_GROUP_NULL) { 1130 PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1131 PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup)); 1132 PetscCallMPI(MPI_Group_free(&group)); 1133 } 1134 *outgoing = sf->outgroup; 1135 PetscFunctionReturn(0); 1136 } 1137 1138 /*@ 1139 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1140 1141 Collective 1142 1143 Input Parameter: 1144 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1145 1146 Output Parameter: 1147 . multi - star forest with split roots, such that each root has degree exactly 1 1148 1149 Level: developer 1150 1151 Notes: 1152 1153 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1154 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1155 edge, it is a candidate for future optimization that might involve its removal. 1156 1157 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1158 @*/ 1159 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1160 { 1161 PetscFunctionBegin; 1162 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1163 PetscValidPointer(multi,2); 1164 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1165 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi)); 1166 *multi = sf->multi; 1167 sf->multi->multi = sf->multi; 1168 PetscFunctionReturn(0); 1169 } 1170 if (!sf->multi) { 1171 const PetscInt *indegree; 1172 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1173 PetscSFNode *remote; 1174 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1175 PetscCall(PetscSFComputeDegreeBegin(sf,&indegree)); 1176 PetscCall(PetscSFComputeDegreeEnd(sf,&indegree)); 1177 PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset)); 1178 inoffset[0] = 0; 1179 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1180 for (i=0; i<maxlocal; i++) outones[i] = 1; 1181 PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM)); 1182 PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM)); 1183 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1184 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1185 for (i=0; i<sf->nroots; i++) { 1186 PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1187 } 1188 } 1189 PetscCall(PetscMalloc1(sf->nleaves,&remote)); 1190 for (i=0; i<sf->nleaves; i++) { 1191 remote[i].rank = sf->remote[i].rank; 1192 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1193 } 1194 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi)); 1195 sf->multi->multi = sf->multi; 1196 PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER)); 1197 if (sf->rankorder) { /* Sort the ranks */ 1198 PetscMPIInt rank; 1199 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1200 PetscSFNode *newremote; 1201 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 1202 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1203 PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset)); 1204 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1205 PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE)); 1206 PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE)); 1207 /* Sort the incoming ranks at each vertex, build the inverse map */ 1208 for (i=0; i<sf->nroots; i++) { 1209 PetscInt j; 1210 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1211 PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset)); 1212 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1213 } 1214 PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE)); 1215 PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE)); 1216 PetscCall(PetscMalloc1(sf->nleaves,&newremote)); 1217 for (i=0; i<sf->nleaves; i++) { 1218 newremote[i].rank = sf->remote[i].rank; 1219 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1220 } 1221 PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER)); 1222 PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset)); 1223 } 1224 PetscCall(PetscFree3(inoffset,outones,outoffset)); 1225 } 1226 *multi = sf->multi; 1227 PetscFunctionReturn(0); 1228 } 1229 1230 /*@C 1231 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1232 1233 Collective 1234 1235 Input Parameters: 1236 + sf - original star forest 1237 . nselected - number of selected roots on this process 1238 - selected - indices of the selected roots on this process 1239 1240 Output Parameter: 1241 . esf - new star forest 1242 1243 Level: advanced 1244 1245 Note: 1246 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1247 be done by calling PetscSFGetGraph(). 1248 1249 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1250 @*/ 1251 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1252 { 1253 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1254 const PetscInt *ilocal; 1255 signed char *rootdata,*leafdata,*leafmem; 1256 const PetscSFNode *iremote; 1257 PetscSFNode *new_iremote; 1258 MPI_Comm comm; 1259 1260 PetscFunctionBegin; 1261 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1262 PetscSFCheckGraphSet(sf,1); 1263 if (nselected) PetscValidIntPointer(selected,3); 1264 PetscValidPointer(esf,4); 1265 1266 PetscCall(PetscSFSetUp(sf)); 1267 PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0)); 1268 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 1269 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 1270 1271 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1272 PetscBool dups; 1273 PetscCall(PetscCheckDupsInt(nselected,selected,&dups)); 1274 PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1275 for (i=0; i<nselected; i++) 1276 PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1277 } 1278 1279 if (sf->ops->CreateEmbeddedRootSF) { 1280 PetscCall((*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf)); 1281 } else { 1282 /* A generic version of creating embedded sf */ 1283 PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf)); 1284 maxlocal = maxleaf - minleaf + 1; 1285 PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem)); 1286 leafdata = leafmem - minleaf; 1287 /* Tag selected roots and bcast to leaves */ 1288 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1289 PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE)); 1290 PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE)); 1291 1292 /* Build esf with leaves that are still connected */ 1293 esf_nleaves = 0; 1294 for (i=0; i<nleaves; i++) { 1295 j = ilocal ? ilocal[i] : i; 1296 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1297 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1298 */ 1299 esf_nleaves += (leafdata[j] ? 1 : 0); 1300 } 1301 PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal)); 1302 PetscCall(PetscMalloc1(esf_nleaves,&new_iremote)); 1303 for (i=n=0; i<nleaves; i++) { 1304 j = ilocal ? ilocal[i] : i; 1305 if (leafdata[j]) { 1306 new_ilocal[n] = j; 1307 new_iremote[n].rank = iremote[i].rank; 1308 new_iremote[n].index = iremote[i].index; 1309 ++n; 1310 } 1311 } 1312 PetscCall(PetscSFCreate(comm,esf)); 1313 PetscCall(PetscSFSetFromOptions(*esf)); 1314 PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 1315 PetscCall(PetscFree2(rootdata,leafmem)); 1316 } 1317 PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0)); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /*@C 1322 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1323 1324 Collective 1325 1326 Input Parameters: 1327 + sf - original star forest 1328 . nselected - number of selected leaves on this process 1329 - selected - indices of the selected leaves on this process 1330 1331 Output Parameter: 1332 . newsf - new star forest 1333 1334 Level: advanced 1335 1336 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1337 @*/ 1338 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1339 { 1340 const PetscSFNode *iremote; 1341 PetscSFNode *new_iremote; 1342 const PetscInt *ilocal; 1343 PetscInt i,nroots,*leaves,*new_ilocal; 1344 MPI_Comm comm; 1345 1346 PetscFunctionBegin; 1347 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1348 PetscSFCheckGraphSet(sf,1); 1349 if (nselected) PetscValidIntPointer(selected,3); 1350 PetscValidPointer(newsf,4); 1351 1352 /* Uniq selected[] and put results in leaves[] */ 1353 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 1354 PetscCall(PetscMalloc1(nselected,&leaves)); 1355 PetscCall(PetscArraycpy(leaves,selected,nselected)); 1356 PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves)); 1357 PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1358 1359 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1360 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1361 PetscCall((*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf)); 1362 } else { 1363 PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote)); 1364 PetscCall(PetscMalloc1(nselected,&new_ilocal)); 1365 PetscCall(PetscMalloc1(nselected,&new_iremote)); 1366 for (i=0; i<nselected; ++i) { 1367 const PetscInt l = leaves[i]; 1368 new_ilocal[i] = ilocal ? ilocal[l] : l; 1369 new_iremote[i].rank = iremote[l].rank; 1370 new_iremote[i].index = iremote[l].index; 1371 } 1372 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf)); 1373 PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 1374 } 1375 PetscCall(PetscFree(leaves)); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 /*@C 1380 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1381 1382 Collective on PetscSF 1383 1384 Input Parameters: 1385 + sf - star forest on which to communicate 1386 . unit - data type associated with each node 1387 . rootdata - buffer to broadcast 1388 - op - operation to use for reduction 1389 1390 Output Parameter: 1391 . leafdata - buffer to be reduced with values from each leaf's respective root 1392 1393 Level: intermediate 1394 1395 Notes: 1396 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1397 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1398 use PetscSFBcastWithMemTypeBegin() instead. 1399 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1400 @*/ 1401 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1402 { 1403 PetscMemType rootmtype,leafmtype; 1404 1405 PetscFunctionBegin; 1406 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1407 PetscCall(PetscSFSetUp(sf)); 1408 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 1409 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1410 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1411 PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op)); 1412 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /*@C 1417 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1418 1419 Collective on PetscSF 1420 1421 Input Parameters: 1422 + sf - star forest on which to communicate 1423 . unit - data type associated with each node 1424 . rootmtype - memory type of rootdata 1425 . rootdata - buffer to broadcast 1426 . leafmtype - memory type of leafdata 1427 - op - operation to use for reduction 1428 1429 Output Parameter: 1430 . leafdata - buffer to be reduced with values from each leaf's respective root 1431 1432 Level: intermediate 1433 1434 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1435 @*/ 1436 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1437 { 1438 PetscFunctionBegin; 1439 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1440 PetscCall(PetscSFSetUp(sf)); 1441 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 1442 PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op)); 1443 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /*@C 1448 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1449 1450 Collective 1451 1452 Input Parameters: 1453 + sf - star forest 1454 . unit - data type 1455 . rootdata - buffer to broadcast 1456 - op - operation to use for reduction 1457 1458 Output Parameter: 1459 . leafdata - buffer to be reduced with values from each leaf's respective root 1460 1461 Level: intermediate 1462 1463 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1464 @*/ 1465 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1466 { 1467 PetscFunctionBegin; 1468 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1469 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0)); 1470 PetscCall((*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op)); 1471 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0)); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 /*@C 1476 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1477 1478 Collective 1479 1480 Input Parameters: 1481 + sf - star forest 1482 . unit - data type 1483 . leafdata - values to reduce 1484 - op - reduction operation 1485 1486 Output Parameter: 1487 . rootdata - result of reduction of values from all leaves of each root 1488 1489 Level: intermediate 1490 1491 Notes: 1492 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1493 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1494 use PetscSFReduceWithMemTypeBegin() instead. 1495 1496 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1497 @*/ 1498 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1499 { 1500 PetscMemType rootmtype,leafmtype; 1501 1502 PetscFunctionBegin; 1503 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1504 PetscCall(PetscSFSetUp(sf)); 1505 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0)); 1506 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1507 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1508 PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op)); 1509 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0)); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 /*@C 1514 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1515 1516 Collective 1517 1518 Input Parameters: 1519 + sf - star forest 1520 . unit - data type 1521 . leafmtype - memory type of leafdata 1522 . leafdata - values to reduce 1523 . rootmtype - memory type of rootdata 1524 - op - reduction operation 1525 1526 Output Parameter: 1527 . rootdata - result of reduction of values from all leaves of each root 1528 1529 Level: intermediate 1530 1531 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1532 @*/ 1533 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1534 { 1535 PetscFunctionBegin; 1536 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1537 PetscCall(PetscSFSetUp(sf)); 1538 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0)); 1539 PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op)); 1540 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0)); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 /*@C 1545 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1546 1547 Collective 1548 1549 Input Parameters: 1550 + sf - star forest 1551 . unit - data type 1552 . leafdata - values to reduce 1553 - op - reduction operation 1554 1555 Output Parameter: 1556 . rootdata - result of reduction of values from all leaves of each root 1557 1558 Level: intermediate 1559 1560 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1561 @*/ 1562 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1563 { 1564 PetscFunctionBegin; 1565 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1566 if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0)); 1567 PetscCall((*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op)); 1568 if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0)); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 /*@C 1573 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1574 1575 Collective 1576 1577 Input Parameters: 1578 + sf - star forest 1579 . unit - data type 1580 . leafdata - leaf values to use in reduction 1581 - op - operation to use for reduction 1582 1583 Output Parameters: 1584 + rootdata - root values to be updated, input state is seen by first process to perform an update 1585 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1586 1587 Level: advanced 1588 1589 Note: 1590 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1591 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1592 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1593 integers. 1594 1595 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1596 @*/ 1597 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1598 { 1599 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1600 1601 PetscFunctionBegin; 1602 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1603 PetscCall(PetscSFSetUp(sf)); 1604 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1605 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 1606 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 1607 PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype)); 1608 PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1609 PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op)); 1610 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 /*@C 1615 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1616 1617 Collective 1618 1619 Input Parameters: 1620 + sf - star forest 1621 . unit - data type 1622 . rootmtype - memory type of rootdata 1623 . leafmtype - memory type of leafdata 1624 . leafdata - leaf values to use in reduction 1625 . leafupdatemtype - memory type of leafupdate 1626 - op - operation to use for reduction 1627 1628 Output Parameters: 1629 + rootdata - root values to be updated, input state is seen by first process to perform an update 1630 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1631 1632 Level: advanced 1633 1634 Note: See PetscSFFetchAndOpBegin() for more details. 1635 1636 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1637 @*/ 1638 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op) 1639 { 1640 PetscFunctionBegin; 1641 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1642 PetscCall(PetscSFSetUp(sf)); 1643 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1644 PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1645 PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op)); 1646 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 /*@C 1651 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1652 1653 Collective 1654 1655 Input Parameters: 1656 + sf - star forest 1657 . unit - data type 1658 . leafdata - leaf values to use in reduction 1659 - op - operation to use for reduction 1660 1661 Output Parameters: 1662 + rootdata - root values to be updated, input state is seen by first process to perform an update 1663 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1664 1665 Level: advanced 1666 1667 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1668 @*/ 1669 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1670 { 1671 PetscFunctionBegin; 1672 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1673 PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0)); 1674 PetscCall((*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op)); 1675 PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0)); 1676 PetscFunctionReturn(0); 1677 } 1678 1679 /*@C 1680 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1681 1682 Collective 1683 1684 Input Parameter: 1685 . sf - star forest 1686 1687 Output Parameter: 1688 . degree - degree of each root vertex 1689 1690 Level: advanced 1691 1692 Notes: 1693 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1694 1695 .seealso: PetscSFGatherBegin() 1696 @*/ 1697 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1698 { 1699 PetscFunctionBegin; 1700 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1701 PetscSFCheckGraphSet(sf,1); 1702 PetscValidPointer(degree,2); 1703 if (!sf->degreeknown) { 1704 PetscInt i, nroots = sf->nroots, maxlocal; 1705 PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1706 maxlocal = sf->maxleaf-sf->minleaf+1; 1707 PetscCall(PetscMalloc1(nroots,&sf->degree)); 1708 PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1709 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1710 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1711 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM)); 1712 } 1713 *degree = NULL; 1714 PetscFunctionReturn(0); 1715 } 1716 1717 /*@C 1718 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1719 1720 Collective 1721 1722 Input Parameter: 1723 . sf - star forest 1724 1725 Output Parameter: 1726 . degree - degree of each root vertex 1727 1728 Level: developer 1729 1730 Notes: 1731 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1732 1733 .seealso: 1734 @*/ 1735 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1736 { 1737 PetscFunctionBegin; 1738 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1739 PetscSFCheckGraphSet(sf,1); 1740 PetscValidPointer(degree,2); 1741 if (!sf->degreeknown) { 1742 PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1743 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM)); 1744 PetscCall(PetscFree(sf->degreetmp)); 1745 sf->degreeknown = PETSC_TRUE; 1746 } 1747 *degree = sf->degree; 1748 PetscFunctionReturn(0); 1749 } 1750 1751 /*@C 1752 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1753 Each multi-root is assigned index of the corresponding original root. 1754 1755 Collective 1756 1757 Input Parameters: 1758 + sf - star forest 1759 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1760 1761 Output Parameters: 1762 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1763 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1764 1765 Level: developer 1766 1767 Notes: 1768 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1769 1770 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1771 @*/ 1772 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1773 { 1774 PetscSF msf; 1775 PetscInt i, j, k, nroots, nmroots; 1776 1777 PetscFunctionBegin; 1778 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1779 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1780 if (nroots) PetscValidIntPointer(degree,2); 1781 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1782 PetscValidPointer(multiRootsOrigNumbering,4); 1783 PetscCall(PetscSFGetMultiSF(sf,&msf)); 1784 PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1785 PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1786 for (i=0,j=0,k=0; i<nroots; i++) { 1787 if (!degree[i]) continue; 1788 for (j=0; j<degree[i]; j++,k++) { 1789 (*multiRootsOrigNumbering)[k] = i; 1790 } 1791 } 1792 PetscCheck(k == nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1793 if (nMultiRoots) *nMultiRoots = nmroots; 1794 PetscFunctionReturn(0); 1795 } 1796 1797 /*@C 1798 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1799 1800 Collective 1801 1802 Input Parameters: 1803 + sf - star forest 1804 . unit - data type 1805 - leafdata - leaf data to gather to roots 1806 1807 Output Parameter: 1808 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1809 1810 Level: intermediate 1811 1812 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1813 @*/ 1814 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1815 { 1816 PetscSF multi = NULL; 1817 1818 PetscFunctionBegin; 1819 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1820 PetscCall(PetscSFSetUp(sf)); 1821 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1822 PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE)); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 /*@C 1827 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1828 1829 Collective 1830 1831 Input Parameters: 1832 + sf - star forest 1833 . unit - data type 1834 - leafdata - leaf data to gather to roots 1835 1836 Output Parameter: 1837 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1838 1839 Level: intermediate 1840 1841 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1842 @*/ 1843 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1844 { 1845 PetscSF multi = NULL; 1846 1847 PetscFunctionBegin; 1848 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1849 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1850 PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE)); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 /*@C 1855 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1856 1857 Collective 1858 1859 Input Parameters: 1860 + sf - star forest 1861 . unit - data type 1862 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1863 1864 Output Parameter: 1865 . leafdata - leaf data to be update with personal data from each respective root 1866 1867 Level: intermediate 1868 1869 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1870 @*/ 1871 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1872 { 1873 PetscSF multi = NULL; 1874 1875 PetscFunctionBegin; 1876 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1877 PetscCall(PetscSFSetUp(sf)); 1878 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1879 PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE)); 1880 PetscFunctionReturn(0); 1881 } 1882 1883 /*@C 1884 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1885 1886 Collective 1887 1888 Input Parameters: 1889 + sf - star forest 1890 . unit - data type 1891 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1892 1893 Output Parameter: 1894 . leafdata - leaf data to be update with personal data from each respective root 1895 1896 Level: intermediate 1897 1898 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1899 @*/ 1900 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1901 { 1902 PetscSF multi = NULL; 1903 1904 PetscFunctionBegin; 1905 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1906 PetscCall(PetscSFGetMultiSF(sf,&multi)); 1907 PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE)); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1912 { 1913 PetscInt i, n, nleaves; 1914 const PetscInt *ilocal = NULL; 1915 PetscHSetI seen; 1916 1917 PetscFunctionBegin; 1918 if (PetscDefined(USE_DEBUG)) { 1919 PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL)); 1920 PetscCall(PetscHSetICreate(&seen)); 1921 for (i = 0; i < nleaves; i++) { 1922 const PetscInt leaf = ilocal ? ilocal[i] : i; 1923 PetscCall(PetscHSetIAdd(seen,leaf)); 1924 } 1925 PetscCall(PetscHSetIGetSize(seen,&n)); 1926 PetscCheck(n == nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1927 PetscCall(PetscHSetIDestroy(&seen)); 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 /*@ 1933 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1934 1935 Input Parameters: 1936 + sfA - The first PetscSF 1937 - sfB - The second PetscSF 1938 1939 Output Parameters: 1940 . sfBA - The composite SF 1941 1942 Level: developer 1943 1944 Notes: 1945 Currently, the two SFs must be defined on congruent communicators and they must be true star 1946 forests, i.e. the same leaf is not connected with different roots. 1947 1948 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1949 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1950 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1951 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1952 1953 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1954 @*/ 1955 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1956 { 1957 const PetscSFNode *remotePointsA,*remotePointsB; 1958 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1959 const PetscInt *localPointsA,*localPointsB; 1960 PetscInt *localPointsBA; 1961 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1962 PetscBool denseB; 1963 1964 PetscFunctionBegin; 1965 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1966 PetscSFCheckGraphSet(sfA,1); 1967 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1968 PetscSFCheckGraphSet(sfB,2); 1969 PetscCheckSameComm(sfA,1,sfB,2); 1970 PetscValidPointer(sfBA,3); 1971 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 1972 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 1973 1974 PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA)); 1975 PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB)); 1976 if (localPointsA) { 1977 PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA)); 1978 for (i=0; i<numRootsB; i++) { 1979 reorderedRemotePointsA[i].rank = -1; 1980 reorderedRemotePointsA[i].index = -1; 1981 } 1982 for (i=0; i<numLeavesA; i++) { 1983 if (localPointsA[i] >= numRootsB) continue; 1984 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1985 } 1986 remotePointsA = reorderedRemotePointsA; 1987 } 1988 PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf)); 1989 PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB)); 1990 PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE)); 1991 PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE)); 1992 PetscCall(PetscFree(reorderedRemotePointsA)); 1993 1994 denseB = (PetscBool)!localPointsB; 1995 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1996 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1997 else numLeavesBA++; 1998 } 1999 if (denseB) { 2000 localPointsBA = NULL; 2001 remotePointsBA = leafdataB; 2002 } else { 2003 PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA)); 2004 PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA)); 2005 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2006 const PetscInt l = localPointsB ? localPointsB[i] : i; 2007 2008 if (leafdataB[l-minleaf].rank == -1) continue; 2009 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2010 localPointsBA[numLeavesBA] = l; 2011 numLeavesBA++; 2012 } 2013 PetscCall(PetscFree(leafdataB)); 2014 } 2015 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA)); 2016 PetscCall(PetscSFSetFromOptions(*sfBA)); 2017 PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER)); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 /*@ 2022 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2023 2024 Input Parameters: 2025 + sfA - The first PetscSF 2026 - sfB - The second PetscSF 2027 2028 Output Parameters: 2029 . sfBA - The composite SF. 2030 2031 Level: developer 2032 2033 Notes: 2034 Currently, the two SFs must be defined on congruent communicators and they must be true star 2035 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2036 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2037 2038 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2039 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2040 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2041 a Reduce on sfB, on connected roots. 2042 2043 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2044 @*/ 2045 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2046 { 2047 const PetscSFNode *remotePointsA,*remotePointsB; 2048 PetscSFNode *remotePointsBA; 2049 const PetscInt *localPointsA,*localPointsB; 2050 PetscSFNode *reorderedRemotePointsA = NULL; 2051 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2052 MPI_Op op; 2053 #if defined(PETSC_USE_64BIT_INDICES) 2054 PetscBool iswin; 2055 #endif 2056 2057 PetscFunctionBegin; 2058 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2059 PetscSFCheckGraphSet(sfA,1); 2060 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2061 PetscSFCheckGraphSet(sfB,2); 2062 PetscCheckSameComm(sfA,1,sfB,2); 2063 PetscValidPointer(sfBA,3); 2064 PetscCall(PetscSFCheckLeavesUnique_Private(sfA)); 2065 PetscCall(PetscSFCheckLeavesUnique_Private(sfB)); 2066 2067 PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2068 PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2069 2070 /* TODO: Check roots of sfB have degree of 1 */ 2071 /* Once we implement it, we can replace the MPI_MAXLOC 2072 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2073 We use MPI_MAXLOC only to have a deterministic output from this routine if 2074 the root condition is not meet. 2075 */ 2076 op = MPI_MAXLOC; 2077 #if defined(PETSC_USE_64BIT_INDICES) 2078 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2079 PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin)); 2080 if (iswin) op = MPI_REPLACE; 2081 #endif 2082 2083 PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2084 PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA)); 2085 for (i=0; i<maxleaf - minleaf + 1; i++) { 2086 reorderedRemotePointsA[i].rank = -1; 2087 reorderedRemotePointsA[i].index = -1; 2088 } 2089 if (localPointsA) { 2090 for (i=0; i<numLeavesA; i++) { 2091 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2092 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2093 } 2094 } else { 2095 for (i=0; i<numLeavesA; i++) { 2096 if (i > maxleaf || i < minleaf) continue; 2097 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2098 } 2099 } 2100 2101 PetscCall(PetscMalloc1(numRootsB,&localPointsBA)); 2102 PetscCall(PetscMalloc1(numRootsB,&remotePointsBA)); 2103 for (i=0; i<numRootsB; i++) { 2104 remotePointsBA[i].rank = -1; 2105 remotePointsBA[i].index = -1; 2106 } 2107 2108 PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op)); 2109 PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op)); 2110 PetscCall(PetscFree(reorderedRemotePointsA)); 2111 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2112 if (remotePointsBA[i].rank == -1) continue; 2113 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2114 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2115 localPointsBA[numLeavesBA] = i; 2116 numLeavesBA++; 2117 } 2118 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA)); 2119 PetscCall(PetscSFSetFromOptions(*sfBA)); 2120 PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER)); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 /* 2125 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2126 2127 Input Parameters: 2128 . sf - The global PetscSF 2129 2130 Output Parameters: 2131 . out - The local PetscSF 2132 */ 2133 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2134 { 2135 MPI_Comm comm; 2136 PetscMPIInt myrank; 2137 const PetscInt *ilocal; 2138 const PetscSFNode *iremote; 2139 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2140 PetscSFNode *liremote; 2141 PetscSF lsf; 2142 2143 PetscFunctionBegin; 2144 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2145 if (sf->ops->CreateLocalSF) { 2146 PetscCall((*sf->ops->CreateLocalSF)(sf,out)); 2147 } else { 2148 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2149 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 2150 PetscCallMPI(MPI_Comm_rank(comm,&myrank)); 2151 2152 /* Find out local edges and build a local SF */ 2153 PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 2154 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2155 PetscCall(PetscMalloc1(lnleaves,&lilocal)); 2156 PetscCall(PetscMalloc1(lnleaves,&liremote)); 2157 2158 for (i=j=0; i<nleaves; i++) { 2159 if (iremote[i].rank == (PetscInt)myrank) { 2160 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2161 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2162 liremote[j].index = iremote[i].index; 2163 j++; 2164 } 2165 } 2166 PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf)); 2167 PetscCall(PetscSFSetFromOptions(lsf)); 2168 PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER)); 2169 PetscCall(PetscSFSetUp(lsf)); 2170 *out = lsf; 2171 } 2172 PetscFunctionReturn(0); 2173 } 2174 2175 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2176 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2177 { 2178 PetscMemType rootmtype,leafmtype; 2179 2180 PetscFunctionBegin; 2181 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2182 PetscCall(PetscSFSetUp(sf)); 2183 PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 2184 PetscCall(PetscGetMemType(rootdata,&rootmtype)); 2185 PetscCall(PetscGetMemType(leafdata,&leafmtype)); 2186 if (sf->ops->BcastToZero) { 2187 PetscCall((*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata)); 2188 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2189 PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 2190 PetscFunctionReturn(0); 2191 } 2192 2193 /*@ 2194 PetscSFConcatenate - concatenate multiple SFs into one 2195 2196 Input Parameters: 2197 + comm - the communicator 2198 . nsfs - the number of input PetscSF 2199 . sfs - the array of input PetscSF 2200 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE) 2201 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage 2202 2203 Output Parameters: 2204 . newsf - The resulting PetscSF 2205 2206 Level: developer 2207 2208 Notes: 2209 The communicator of all SFs in sfs must be comm. 2210 2211 The offsets in leafOffsets are added to the original leaf indices. 2212 2213 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2214 In this case, NULL is also passed as ilocal to the resulting SF. 2215 2216 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2217 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2218 2219 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph() 2220 @*/ 2221 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) 2222 { 2223 PetscInt i, s, nLeaves, nRoots; 2224 PetscInt *leafArrayOffsets; 2225 PetscInt *ilocal_new; 2226 PetscSFNode *iremote_new; 2227 PetscInt *rootOffsets; 2228 PetscBool all_ilocal_null = PETSC_FALSE; 2229 PetscMPIInt rank; 2230 2231 PetscFunctionBegin; 2232 { 2233 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2234 2235 PetscCall(PetscSFCreate(comm,&dummy)); 2236 PetscValidLogicalCollectiveInt(dummy,nsfs,2); 2237 PetscValidPointer(sfs,3); 2238 for (i=0; i<nsfs; i++) { 2239 PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3); 2240 PetscCheckSameComm(dummy,1,sfs[i],3); 2241 } 2242 PetscValidLogicalCollectiveBool(dummy,shareRoots,4); 2243 if (leafOffsets) PetscValidIntPointer(leafOffsets,5); 2244 PetscValidPointer(newsf,6); 2245 PetscCall(PetscSFDestroy(&dummy)); 2246 } 2247 if (!nsfs) { 2248 PetscCall(PetscSFCreate(comm, newsf)); 2249 PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2250 PetscFunctionReturn(0); 2251 } 2252 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2253 2254 PetscCall(PetscCalloc1(nsfs+1, &rootOffsets)); 2255 if (shareRoots) { 2256 PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2257 if (PetscDefined(USE_DEBUG)) { 2258 for (s=1; s<nsfs; s++) { 2259 PetscInt nr; 2260 2261 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2262 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2263 } 2264 } 2265 } else { 2266 for (s=0; s<nsfs; s++) { 2267 PetscInt nr; 2268 2269 PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2270 rootOffsets[s+1] = rootOffsets[s] + nr; 2271 } 2272 nRoots = rootOffsets[nsfs]; 2273 } 2274 2275 /* Calculate leaf array offsets and automatic root offsets */ 2276 PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets)); 2277 leafArrayOffsets[0] = 0; 2278 for (s=0; s<nsfs; s++) { 2279 PetscInt nl; 2280 2281 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2282 leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl; 2283 } 2284 nLeaves = leafArrayOffsets[nsfs]; 2285 2286 if (!leafOffsets) { 2287 all_ilocal_null = PETSC_TRUE; 2288 for (s=0; s<nsfs; s++) { 2289 const PetscInt *ilocal; 2290 2291 PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2292 if (ilocal) { 2293 all_ilocal_null = PETSC_FALSE; 2294 break; 2295 } 2296 } 2297 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2298 } 2299 2300 /* Renumber and concatenate local leaves */ 2301 ilocal_new = NULL; 2302 if (!all_ilocal_null) { 2303 PetscCall(PetscMalloc1(nLeaves, &ilocal_new)); 2304 for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1; 2305 for (s = 0; s<nsfs; s++) { 2306 const PetscInt *ilocal; 2307 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2308 PetscInt i, nleaves_l; 2309 2310 PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2311 for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2312 } 2313 } 2314 2315 /* Renumber and concatenate remote roots */ 2316 PetscCall(PetscMalloc1(nLeaves, &iremote_new)); 2317 for (i = 0; i < nLeaves; i++) { 2318 iremote_new[i].rank = -1; 2319 iremote_new[i].index = -1; 2320 } 2321 for (s = 0; s<nsfs; s++) { 2322 PetscInt i, nl, nr; 2323 PetscSF tmp_sf; 2324 const PetscSFNode *iremote; 2325 PetscSFNode *tmp_rootdata; 2326 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2327 2328 PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2329 PetscCall(PetscSFCreate(comm, &tmp_sf)); 2330 /* create helper SF with contiguous leaves */ 2331 PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES)); 2332 PetscCall(PetscSFSetUp(tmp_sf)); 2333 PetscCall(PetscMalloc1(nr, &tmp_rootdata)); 2334 for (i = 0; i < nr; i++) { 2335 tmp_rootdata[i].index = i + rootOffsets[s]; 2336 tmp_rootdata[i].rank = (PetscInt) rank; 2337 } 2338 PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2339 PetscCall(PetscSFBcastEnd( tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2340 PetscCall(PetscSFDestroy(&tmp_sf)); 2341 PetscCall(PetscFree(tmp_rootdata)); 2342 } 2343 2344 /* Build the new SF */ 2345 PetscCall(PetscSFCreate(comm, newsf)); 2346 PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2347 PetscCall(PetscSFSetUp(*newsf)); 2348 PetscCall(PetscFree(rootOffsets)); 2349 PetscCall(PetscFree(leafArrayOffsets)); 2350 PetscFunctionReturn(0); 2351 } 2352