1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) do { \ 19 if (PetscUnlikely(!(sf)->graphset)) \ 20 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 21 } while (0) 22 #else 23 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window -Use MPI-3 one-sided window for communication 43 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 44 45 Level: intermediate 46 47 Notes: 48 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 49 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 50 SFs are optimized and they have better performance than general SFs. 51 52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 53 @*/ 54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 55 { 56 PetscSF b; 57 58 PetscFunctionBegin; 59 PetscValidPointer(sf,2); 60 CHKERRQ(PetscSFInitializePackage()); 61 62 CHKERRQ(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView)); 63 64 b->nroots = -1; 65 b->nleaves = -1; 66 b->minleaf = PETSC_MAX_INT; 67 b->maxleaf = PETSC_MIN_INT; 68 b->nranks = -1; 69 b->rankorder = PETSC_TRUE; 70 b->ingroup = MPI_GROUP_NULL; 71 b->outgroup = MPI_GROUP_NULL; 72 b->graphset = PETSC_FALSE; 73 #if defined(PETSC_HAVE_DEVICE) 74 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 75 b->use_stream_aware_mpi = PETSC_FALSE; 76 b->unknown_input_stream= PETSC_FALSE; 77 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 78 b->backend = PETSCSF_BACKEND_KOKKOS; 79 #elif defined(PETSC_HAVE_CUDA) 80 b->backend = PETSCSF_BACKEND_CUDA; 81 #elif defined(PETSC_HAVE_HIP) 82 b->backend = PETSCSF_BACKEND_HIP; 83 #endif 84 85 #if defined(PETSC_HAVE_NVSHMEM) 86 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 87 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 88 CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL)); 89 CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL)); 90 #endif 91 #endif 92 b->vscat.from_n = -1; 93 b->vscat.to_n = -1; 94 b->vscat.unit = MPIU_SCALAR; 95 *sf = b; 96 PetscFunctionReturn(0); 97 } 98 99 /*@ 100 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 101 102 Collective 103 104 Input Parameter: 105 . sf - star forest 106 107 Level: advanced 108 109 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 110 @*/ 111 PetscErrorCode PetscSFReset(PetscSF sf) 112 { 113 PetscFunctionBegin; 114 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 115 if (sf->ops->Reset) CHKERRQ((*sf->ops->Reset)(sf)); 116 sf->nroots = -1; 117 sf->nleaves = -1; 118 sf->minleaf = PETSC_MAX_INT; 119 sf->maxleaf = PETSC_MIN_INT; 120 sf->mine = NULL; 121 sf->remote = NULL; 122 sf->graphset = PETSC_FALSE; 123 CHKERRQ(PetscFree(sf->mine_alloc)); 124 CHKERRQ(PetscFree(sf->remote_alloc)); 125 sf->nranks = -1; 126 CHKERRQ(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote)); 127 sf->degreeknown = PETSC_FALSE; 128 CHKERRQ(PetscFree(sf->degree)); 129 if (sf->ingroup != MPI_GROUP_NULL) CHKERRMPI(MPI_Group_free(&sf->ingroup)); 130 if (sf->outgroup != MPI_GROUP_NULL) CHKERRMPI(MPI_Group_free(&sf->outgroup)); 131 if (sf->multi) sf->multi->multi = NULL; 132 CHKERRQ(PetscSFDestroy(&sf->multi)); 133 CHKERRQ(PetscLayoutDestroy(&sf->map)); 134 135 #if defined(PETSC_HAVE_DEVICE) 136 for (PetscInt i=0; i<2; i++) CHKERRQ(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i])); 137 #endif 138 139 sf->setupcalled = PETSC_FALSE; 140 PetscFunctionReturn(0); 141 } 142 143 /*@C 144 PetscSFSetType - Set the PetscSF communication implementation 145 146 Collective on PetscSF 147 148 Input Parameters: 149 + sf - the PetscSF context 150 - type - a known method 151 152 Options Database Key: 153 . -sf_type <type> - Sets the method; use -help for a list 154 of available methods (for instance, window, basic, neighbor) 155 156 Notes: 157 See "include/petscsf.h" for available methods (for instance) 158 + PETSCSFWINDOW - MPI-2/3 one-sided 159 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 160 161 Level: intermediate 162 163 .seealso: PetscSFType, PetscSFCreate() 164 @*/ 165 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 166 { 167 PetscBool match; 168 PetscErrorCode (*r)(PetscSF); 169 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 172 PetscValidCharPointer(type,2); 173 174 CHKERRQ(PetscObjectTypeCompare((PetscObject)sf,type,&match)); 175 if (match) PetscFunctionReturn(0); 176 177 CHKERRQ(PetscFunctionListFind(PetscSFList,type,&r)); 178 PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 179 /* Destroy the previous PetscSF implementation context */ 180 if (sf->ops->Destroy) CHKERRQ((*(sf)->ops->Destroy)(sf)); 181 CHKERRQ(PetscMemzero(sf->ops,sizeof(*sf->ops))); 182 CHKERRQ(PetscObjectChangeTypeName((PetscObject)sf,type)); 183 CHKERRQ((*r)(sf)); 184 PetscFunctionReturn(0); 185 } 186 187 /*@C 188 PetscSFGetType - Get the PetscSF communication implementation 189 190 Not Collective 191 192 Input Parameter: 193 . sf - the PetscSF context 194 195 Output Parameter: 196 . type - the PetscSF type name 197 198 Level: intermediate 199 200 .seealso: PetscSFSetType(), PetscSFCreate() 201 @*/ 202 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 203 { 204 PetscFunctionBegin; 205 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 206 PetscValidPointer(type,2); 207 *type = ((PetscObject)sf)->type_name; 208 PetscFunctionReturn(0); 209 } 210 211 /*@C 212 PetscSFDestroy - destroy star forest 213 214 Collective 215 216 Input Parameter: 217 . sf - address of star forest 218 219 Level: intermediate 220 221 .seealso: PetscSFCreate(), PetscSFReset() 222 @*/ 223 PetscErrorCode PetscSFDestroy(PetscSF *sf) 224 { 225 PetscFunctionBegin; 226 if (!*sf) PetscFunctionReturn(0); 227 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 228 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 229 CHKERRQ(PetscSFReset(*sf)); 230 if ((*sf)->ops->Destroy) CHKERRQ((*(*sf)->ops->Destroy)(*sf)); 231 CHKERRQ(PetscSFDestroy(&(*sf)->vscat.lsf)); 232 if ((*sf)->vscat.bs > 1) CHKERRMPI(MPI_Type_free(&(*sf)->vscat.unit)); 233 CHKERRQ(PetscHeaderDestroy(sf)); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 238 { 239 PetscInt i, nleaves; 240 PetscMPIInt size; 241 const PetscInt *ilocal; 242 const PetscSFNode *iremote; 243 244 PetscFunctionBegin; 245 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 246 CHKERRQ(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote)); 247 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size)); 248 for (i = 0; i < nleaves; i++) { 249 const PetscInt rank = iremote[i].rank; 250 const PetscInt remote = iremote[i].index; 251 const PetscInt leaf = ilocal ? ilocal[i] : i; 252 PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 253 PetscCheckFalse(remote < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 254 PetscCheckFalse(leaf < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 255 } 256 PetscFunctionReturn(0); 257 } 258 259 /*@ 260 PetscSFSetUp - set up communication structures 261 262 Collective 263 264 Input Parameter: 265 . sf - star forest communication object 266 267 Level: beginner 268 269 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 270 @*/ 271 PetscErrorCode PetscSFSetUp(PetscSF sf) 272 { 273 PetscFunctionBegin; 274 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 275 PetscSFCheckGraphSet(sf,1); 276 if (sf->setupcalled) PetscFunctionReturn(0); 277 CHKERRQ(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0)); 278 CHKERRQ(PetscSFCheckGraphValid_Private(sf)); 279 if (!((PetscObject)sf)->type_name) CHKERRQ(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */ 280 if (sf->ops->SetUp) CHKERRQ((*sf->ops->SetUp)(sf)); 281 #if defined(PETSC_HAVE_CUDA) 282 if (sf->backend == PETSCSF_BACKEND_CUDA) { 283 sf->ops->Malloc = PetscSFMalloc_CUDA; 284 sf->ops->Free = PetscSFFree_CUDA; 285 } 286 #endif 287 #if defined(PETSC_HAVE_HIP) 288 if (sf->backend == PETSCSF_BACKEND_HIP) { 289 sf->ops->Malloc = PetscSFMalloc_HIP; 290 sf->ops->Free = PetscSFFree_HIP; 291 } 292 #endif 293 294 # 295 #if defined(PETSC_HAVE_KOKKOS) 296 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 297 sf->ops->Malloc = PetscSFMalloc_Kokkos; 298 sf->ops->Free = PetscSFFree_Kokkos; 299 } 300 #endif 301 CHKERRQ(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0)); 302 sf->setupcalled = PETSC_TRUE; 303 PetscFunctionReturn(0); 304 } 305 306 /*@ 307 PetscSFSetFromOptions - set PetscSF options using the options database 308 309 Logically Collective 310 311 Input Parameter: 312 . sf - star forest 313 314 Options Database Keys: 315 + -sf_type - implementation type, see PetscSFSetType() 316 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 317 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 318 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 319 If true, this option only works with -use_gpu_aware_mpi 1. 320 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 321 If true, this option only works with -use_gpu_aware_mpi 1. 322 323 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 324 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 325 the only available is kokkos. 326 327 Level: intermediate 328 @*/ 329 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 330 { 331 PetscSFType deft; 332 char type[256]; 333 PetscErrorCode ierr; 334 PetscBool flg; 335 336 PetscFunctionBegin; 337 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 338 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 339 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 340 CHKERRQ(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg)); 341 CHKERRQ(PetscSFSetType(sf,flg ? type : deft)); 342 CHKERRQ(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL)); 343 #if defined(PETSC_HAVE_DEVICE) 344 { 345 char backendstr[32] = {0}; 346 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 347 /* Change the defaults set in PetscSFCreate() with command line options */ 348 CHKERRQ(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL)); 349 CHKERRQ(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL)); 350 CHKERRQ(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set)); 351 CHKERRQ(PetscStrcasecmp("cuda",backendstr,&isCuda)); 352 CHKERRQ(PetscStrcasecmp("kokkos",backendstr,&isKokkos)); 353 CHKERRQ(PetscStrcasecmp("hip",backendstr,&isHip)); 354 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 355 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 356 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 357 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 358 else PetscCheckFalse(set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 359 #elif defined(PETSC_HAVE_KOKKOS) 360 PetscCheckFalse(set && !isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 361 #endif 362 } 363 #endif 364 if (sf->ops->SetFromOptions) CHKERRQ((*sf->ops->SetFromOptions)(PetscOptionsObject,sf)); 365 ierr = PetscOptionsEnd();CHKERRQ(ierr); 366 PetscFunctionReturn(0); 367 } 368 369 /*@ 370 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 371 372 Logically Collective 373 374 Input Parameters: 375 + sf - star forest 376 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 377 378 Level: advanced 379 380 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 381 @*/ 382 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 383 { 384 PetscFunctionBegin; 385 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 386 PetscValidLogicalCollectiveBool(sf,flg,2); 387 PetscCheckFalse(sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 388 sf->rankorder = flg; 389 PetscFunctionReturn(0); 390 } 391 392 /*@C 393 PetscSFSetGraph - Set a parallel star forest 394 395 Collective 396 397 Input Parameters: 398 + sf - star forest 399 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 400 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 401 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 402 during setup in debug mode) 403 . localmode - copy mode for ilocal 404 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 405 during setup in debug mode) 406 - remotemode - copy mode for iremote 407 408 Level: intermediate 409 410 Notes: 411 Leaf indices in ilocal must be unique, otherwise an error occurs. 412 413 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 414 In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 415 PETSc might modify the respective array; 416 if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 417 Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 418 419 Fortran Notes: 420 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 421 422 Developer Notes: 423 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 424 This also allows to compare leaf sets of two SFs easily. 425 426 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 427 @*/ 428 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode) 429 { 430 PetscBool unique, contiguous; 431 432 PetscFunctionBegin; 433 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 434 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 435 if (nleaves > 0) PetscValidPointer(iremote,6); 436 PetscCheckFalse(nroots < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 437 PetscCheckFalse(nleaves < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 438 PetscCheck(localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode); 439 PetscCheck(remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode); 440 441 if (sf->nroots >= 0) { /* Reset only if graph already set */ 442 CHKERRQ(PetscSFReset(sf)); 443 } 444 445 CHKERRQ(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0)); 446 447 sf->nroots = nroots; 448 sf->nleaves = nleaves; 449 450 if (localmode == PETSC_COPY_VALUES && ilocal) { 451 PetscInt *tlocal = NULL; 452 453 CHKERRQ(PetscMalloc1(nleaves,&tlocal)); 454 CHKERRQ(PetscArraycpy(tlocal,ilocal,nleaves)); 455 ilocal = tlocal; 456 } 457 if (remotemode == PETSC_COPY_VALUES) { 458 PetscSFNode *tremote = NULL; 459 460 CHKERRQ(PetscMalloc1(nleaves,&tremote)); 461 CHKERRQ(PetscArraycpy(tremote,iremote,nleaves)); 462 iremote = tremote; 463 } 464 465 if (nleaves && ilocal) { 466 PetscSFNode work; 467 468 CHKERRQ(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work)); 469 CHKERRQ(PetscSortedCheckDupsInt(nleaves, ilocal, &unique)); 470 unique = PetscNot(unique); 471 PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF"); 472 sf->minleaf = ilocal[0]; 473 sf->maxleaf = ilocal[nleaves-1]; 474 contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1); 475 } else { 476 sf->minleaf = 0; 477 sf->maxleaf = nleaves - 1; 478 unique = PETSC_TRUE; 479 contiguous = PETSC_TRUE; 480 } 481 482 if (contiguous) { 483 if (localmode == PETSC_USE_POINTER) { 484 ilocal = NULL; 485 } else { 486 CHKERRQ(PetscFree(ilocal)); 487 } 488 } 489 sf->mine = ilocal; 490 if (localmode == PETSC_USE_POINTER) { 491 sf->mine_alloc = NULL; 492 } else { 493 sf->mine_alloc = ilocal; 494 } 495 sf->remote = iremote; 496 if (remotemode == PETSC_USE_POINTER) { 497 sf->remote_alloc = NULL; 498 } else { 499 sf->remote_alloc = iremote; 500 } 501 CHKERRQ(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0)); 502 sf->graphset = PETSC_TRUE; 503 PetscFunctionReturn(0); 504 } 505 506 /*@ 507 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 508 509 Collective 510 511 Input Parameters: 512 + sf - The PetscSF 513 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 514 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 515 516 Notes: 517 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 518 n and N are local and global sizes of x respectively. 519 520 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 521 sequential vectors y on all ranks. 522 523 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 524 sequential vector y on rank 0. 525 526 In above cases, entries of x are roots and entries of y are leaves. 527 528 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 529 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 530 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 531 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 532 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 533 534 In this case, roots and leaves are symmetric. 535 536 Level: intermediate 537 @*/ 538 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 539 { 540 MPI_Comm comm; 541 PetscInt n,N,res[2]; 542 PetscMPIInt rank,size; 543 PetscSFType type; 544 545 PetscFunctionBegin; 546 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 547 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 548 CHKERRQ(PetscObjectGetComm((PetscObject)sf, &comm)); 549 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 550 CHKERRMPI(MPI_Comm_rank(comm,&rank)); 551 CHKERRMPI(MPI_Comm_size(comm,&size)); 552 553 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 554 type = PETSCSFALLTOALL; 555 CHKERRQ(PetscLayoutCreate(comm,&sf->map)); 556 CHKERRQ(PetscLayoutSetLocalSize(sf->map,size)); 557 CHKERRQ(PetscLayoutSetSize(sf->map,((PetscInt)size)*size)); 558 CHKERRQ(PetscLayoutSetUp(sf->map)); 559 } else { 560 CHKERRQ(PetscLayoutGetLocalSize(map,&n)); 561 CHKERRQ(PetscLayoutGetSize(map,&N)); 562 res[0] = n; 563 res[1] = -n; 564 /* Check if n are same over all ranks so that we can optimize it */ 565 CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm)); 566 if (res[0] == -res[1]) { /* same n */ 567 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 568 } else { 569 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 570 } 571 CHKERRQ(PetscLayoutReference(map,&sf->map)); 572 } 573 CHKERRQ(PetscSFSetType(sf,type)); 574 575 sf->pattern = pattern; 576 sf->mine = NULL; /* Contiguous */ 577 578 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 579 Also set other easy stuff. 580 */ 581 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 582 sf->nleaves = N; 583 sf->nroots = n; 584 sf->nranks = size; 585 sf->minleaf = 0; 586 sf->maxleaf = N - 1; 587 } else if (pattern == PETSCSF_PATTERN_GATHER) { 588 sf->nleaves = rank ? 0 : N; 589 sf->nroots = n; 590 sf->nranks = rank ? 0 : size; 591 sf->minleaf = 0; 592 sf->maxleaf = rank ? -1 : N - 1; 593 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 594 sf->nleaves = size; 595 sf->nroots = size; 596 sf->nranks = size; 597 sf->minleaf = 0; 598 sf->maxleaf = size - 1; 599 } 600 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 601 sf->graphset = PETSC_TRUE; 602 PetscFunctionReturn(0); 603 } 604 605 /*@ 606 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 607 608 Collective 609 610 Input Parameter: 611 . sf - star forest to invert 612 613 Output Parameter: 614 . isf - inverse of sf 615 616 Level: advanced 617 618 Notes: 619 All roots must have degree 1. 620 621 The local space may be a permutation, but cannot be sparse. 622 623 .seealso: PetscSFSetGraph() 624 @*/ 625 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 626 { 627 PetscMPIInt rank; 628 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 629 const PetscInt *ilocal; 630 PetscSFNode *roots,*leaves; 631 632 PetscFunctionBegin; 633 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 634 PetscSFCheckGraphSet(sf,1); 635 PetscValidPointer(isf,2); 636 637 CHKERRQ(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL)); 638 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 639 640 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 641 CHKERRQ(PetscMalloc2(nroots,&roots,maxlocal,&leaves)); 642 for (i=0; i<maxlocal; i++) { 643 leaves[i].rank = rank; 644 leaves[i].index = i; 645 } 646 for (i=0; i <nroots; i++) { 647 roots[i].rank = -1; 648 roots[i].index = -1; 649 } 650 CHKERRQ(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE)); 651 CHKERRQ(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE)); 652 653 /* Check whether our leaves are sparse */ 654 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 655 if (count == nroots) newilocal = NULL; 656 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 657 CHKERRQ(PetscMalloc1(count,&newilocal)); 658 for (i=0,count=0; i<nroots; i++) { 659 if (roots[i].rank >= 0) { 660 newilocal[count] = i; 661 roots[count].rank = roots[i].rank; 662 roots[count].index = roots[i].index; 663 count++; 664 } 665 } 666 } 667 668 CHKERRQ(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf)); 669 CHKERRQ(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES)); 670 CHKERRQ(PetscFree2(roots,leaves)); 671 PetscFunctionReturn(0); 672 } 673 674 /*@ 675 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 676 677 Collective 678 679 Input Parameters: 680 + sf - communication object to duplicate 681 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 682 683 Output Parameter: 684 . newsf - new communication object 685 686 Level: beginner 687 688 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 689 @*/ 690 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 691 { 692 PetscSFType type; 693 MPI_Datatype dtype=MPIU_SCALAR; 694 695 PetscFunctionBegin; 696 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 697 PetscValidLogicalCollectiveEnum(sf,opt,2); 698 PetscValidPointer(newsf,3); 699 CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf)); 700 CHKERRQ(PetscSFGetType(sf,&type)); 701 if (type) CHKERRQ(PetscSFSetType(*newsf,type)); 702 if (opt == PETSCSF_DUPLICATE_GRAPH) { 703 PetscSFCheckGraphSet(sf,1); 704 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 705 PetscInt nroots,nleaves; 706 const PetscInt *ilocal; 707 const PetscSFNode *iremote; 708 CHKERRQ(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 709 CHKERRQ(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES)); 710 } else { 711 CHKERRQ(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern)); 712 } 713 } 714 /* Since oldtype is committed, so is newtype, according to MPI */ 715 if (sf->vscat.bs > 1) CHKERRMPI(MPI_Type_dup(sf->vscat.unit,&dtype)); 716 (*newsf)->vscat.bs = sf->vscat.bs; 717 (*newsf)->vscat.unit = dtype; 718 (*newsf)->vscat.to_n = sf->vscat.to_n; 719 (*newsf)->vscat.from_n = sf->vscat.from_n; 720 /* Do not copy lsf. Build it on demand since it is rarely used */ 721 722 #if defined(PETSC_HAVE_DEVICE) 723 (*newsf)->backend = sf->backend; 724 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 725 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 726 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 727 #endif 728 if (sf->ops->Duplicate) CHKERRQ((*sf->ops->Duplicate)(sf,opt,*newsf)); 729 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 730 PetscFunctionReturn(0); 731 } 732 733 /*@C 734 PetscSFGetGraph - Get the graph specifying a parallel star forest 735 736 Not Collective 737 738 Input Parameter: 739 . sf - star forest 740 741 Output Parameters: 742 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 743 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 744 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 745 - iremote - remote locations of root vertices for each leaf on the current process 746 747 Notes: 748 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 749 750 The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 751 see its manpage for details. 752 753 Fortran Notes: 754 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 755 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 756 757 To check for a NULL ilocal use 758 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 759 760 Level: intermediate 761 762 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 763 @*/ 764 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 765 { 766 PetscFunctionBegin; 767 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 768 if (sf->ops->GetGraph) { 769 CHKERRQ((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote)); 770 } else { 771 if (nroots) *nroots = sf->nroots; 772 if (nleaves) *nleaves = sf->nleaves; 773 if (ilocal) *ilocal = sf->mine; 774 if (iremote) *iremote = sf->remote; 775 } 776 PetscFunctionReturn(0); 777 } 778 779 /*@ 780 PetscSFGetLeafRange - Get the active leaf ranges 781 782 Not Collective 783 784 Input Parameter: 785 . sf - star forest 786 787 Output Parameters: 788 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 789 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 790 791 Level: developer 792 793 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 794 @*/ 795 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 796 { 797 PetscFunctionBegin; 798 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 799 PetscSFCheckGraphSet(sf,1); 800 if (minleaf) *minleaf = sf->minleaf; 801 if (maxleaf) *maxleaf = sf->maxleaf; 802 PetscFunctionReturn(0); 803 } 804 805 /*@C 806 PetscSFViewFromOptions - View from Options 807 808 Collective on PetscSF 809 810 Input Parameters: 811 + A - the star forest 812 . obj - Optional object 813 - name - command line option 814 815 Level: intermediate 816 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 817 @*/ 818 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 819 { 820 PetscFunctionBegin; 821 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 822 CHKERRQ(PetscObjectViewFromOptions((PetscObject)A,obj,name)); 823 PetscFunctionReturn(0); 824 } 825 826 /*@C 827 PetscSFView - view a star forest 828 829 Collective 830 831 Input Parameters: 832 + sf - star forest 833 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 834 835 Level: beginner 836 837 .seealso: PetscSFCreate(), PetscSFSetGraph() 838 @*/ 839 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 840 { 841 PetscBool iascii; 842 PetscViewerFormat format; 843 844 PetscFunctionBegin; 845 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 846 if (!viewer) CHKERRQ(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer)); 847 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 848 PetscCheckSameComm(sf,1,viewer,2); 849 if (sf->graphset) CHKERRQ(PetscSFSetUp(sf)); 850 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 851 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 852 PetscMPIInt rank; 853 PetscInt ii,i,j; 854 855 CHKERRQ(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer)); 856 CHKERRQ(PetscViewerASCIIPushTab(viewer)); 857 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 858 if (!sf->graphset) { 859 CHKERRQ(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n")); 860 CHKERRQ(PetscViewerASCIIPopTab(viewer)); 861 PetscFunctionReturn(0); 862 } 863 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 864 CHKERRQ(PetscViewerASCIIPushSynchronized(viewer)); 865 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks)); 866 for (i=0; i<sf->nleaves; i++) { 867 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index)); 868 } 869 CHKERRQ(PetscViewerFlush(viewer)); 870 CHKERRQ(PetscViewerGetFormat(viewer,&format)); 871 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 872 PetscMPIInt *tmpranks,*perm; 873 CHKERRQ(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm)); 874 CHKERRQ(PetscArraycpy(tmpranks,sf->ranks,sf->nranks)); 875 for (i=0; i<sf->nranks; i++) perm[i] = i; 876 CHKERRQ(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm)); 877 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank)); 878 for (ii=0; ii<sf->nranks; ii++) { 879 i = perm[ii]; 880 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i])); 881 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 882 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j])); 883 } 884 } 885 CHKERRQ(PetscFree2(tmpranks,perm)); 886 } 887 CHKERRQ(PetscViewerFlush(viewer)); 888 CHKERRQ(PetscViewerASCIIPopSynchronized(viewer)); 889 } 890 CHKERRQ(PetscViewerASCIIPopTab(viewer)); 891 } 892 if (sf->ops->View) CHKERRQ((*sf->ops->View)(sf,viewer)); 893 PetscFunctionReturn(0); 894 } 895 896 /*@C 897 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 898 899 Not Collective 900 901 Input Parameter: 902 . sf - star forest 903 904 Output Parameters: 905 + nranks - number of ranks referenced by local part 906 . ranks - array of ranks 907 . roffset - offset in rmine/rremote for each rank (length nranks+1) 908 . rmine - concatenated array holding local indices referencing each remote rank 909 - rremote - concatenated array holding remote indices referenced for each remote rank 910 911 Level: developer 912 913 .seealso: PetscSFGetLeafRanks() 914 @*/ 915 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 916 { 917 PetscFunctionBegin; 918 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 919 PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 920 if (sf->ops->GetRootRanks) { 921 CHKERRQ((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote)); 922 } else { 923 /* The generic implementation */ 924 if (nranks) *nranks = sf->nranks; 925 if (ranks) *ranks = sf->ranks; 926 if (roffset) *roffset = sf->roffset; 927 if (rmine) *rmine = sf->rmine; 928 if (rremote) *rremote = sf->rremote; 929 } 930 PetscFunctionReturn(0); 931 } 932 933 /*@C 934 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 935 936 Not Collective 937 938 Input Parameter: 939 . sf - star forest 940 941 Output Parameters: 942 + niranks - number of leaf ranks referencing roots on this process 943 . iranks - array of ranks 944 . ioffset - offset in irootloc for each rank (length niranks+1) 945 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 946 947 Level: developer 948 949 .seealso: PetscSFGetRootRanks() 950 @*/ 951 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 952 { 953 PetscFunctionBegin; 954 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 955 PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 956 if (sf->ops->GetLeafRanks) { 957 CHKERRQ((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc)); 958 } else { 959 PetscSFType type; 960 CHKERRQ(PetscSFGetType(sf,&type)); 961 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 967 PetscInt i; 968 for (i=0; i<n; i++) { 969 if (needle == list[i]) return PETSC_TRUE; 970 } 971 return PETSC_FALSE; 972 } 973 974 /*@C 975 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 976 977 Collective 978 979 Input Parameters: 980 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 981 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 982 983 Level: developer 984 985 .seealso: PetscSFGetRootRanks() 986 @*/ 987 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 988 { 989 PetscTable table; 990 PetscTablePosition pos; 991 PetscMPIInt size,groupsize,*groupranks; 992 PetscInt *rcount,*ranks; 993 PetscInt i, irank = -1,orank = -1; 994 995 PetscFunctionBegin; 996 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 997 PetscSFCheckGraphSet(sf,1); 998 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size)); 999 CHKERRQ(PetscTableCreate(10,size,&table)); 1000 for (i=0; i<sf->nleaves; i++) { 1001 /* Log 1-based rank */ 1002 CHKERRQ(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES)); 1003 } 1004 CHKERRQ(PetscTableGetCount(table,&sf->nranks)); 1005 CHKERRQ(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote)); 1006 CHKERRQ(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks)); 1007 CHKERRQ(PetscTableGetHeadPosition(table,&pos)); 1008 for (i=0; i<sf->nranks; i++) { 1009 CHKERRQ(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i])); 1010 ranks[i]--; /* Convert back to 0-based */ 1011 } 1012 CHKERRQ(PetscTableDestroy(&table)); 1013 1014 /* We expect that dgroup is reliably "small" while nranks could be large */ 1015 { 1016 MPI_Group group = MPI_GROUP_NULL; 1017 PetscMPIInt *dgroupranks; 1018 CHKERRMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1019 CHKERRMPI(MPI_Group_size(dgroup,&groupsize)); 1020 CHKERRQ(PetscMalloc1(groupsize,&dgroupranks)); 1021 CHKERRQ(PetscMalloc1(groupsize,&groupranks)); 1022 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1023 if (groupsize) CHKERRMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks)); 1024 CHKERRMPI(MPI_Group_free(&group)); 1025 CHKERRQ(PetscFree(dgroupranks)); 1026 } 1027 1028 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1029 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1030 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1031 if (InList(ranks[i],groupsize,groupranks)) break; 1032 } 1033 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1034 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1035 } 1036 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1037 PetscInt tmprank,tmpcount; 1038 1039 tmprank = ranks[i]; 1040 tmpcount = rcount[i]; 1041 ranks[i] = ranks[sf->ndranks]; 1042 rcount[i] = rcount[sf->ndranks]; 1043 ranks[sf->ndranks] = tmprank; 1044 rcount[sf->ndranks] = tmpcount; 1045 sf->ndranks++; 1046 } 1047 } 1048 CHKERRQ(PetscFree(groupranks)); 1049 CHKERRQ(PetscSortIntWithArray(sf->ndranks,ranks,rcount)); 1050 CHKERRQ(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks)); 1051 sf->roffset[0] = 0; 1052 for (i=0; i<sf->nranks; i++) { 1053 CHKERRQ(PetscMPIIntCast(ranks[i],sf->ranks+i)); 1054 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1055 rcount[i] = 0; 1056 } 1057 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1058 /* short circuit */ 1059 if (orank != sf->remote[i].rank) { 1060 /* Search for index of iremote[i].rank in sf->ranks */ 1061 CHKERRQ(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank)); 1062 if (irank < 0) { 1063 CHKERRQ(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank)); 1064 if (irank >= 0) irank += sf->ndranks; 1065 } 1066 orank = sf->remote[i].rank; 1067 } 1068 PetscCheckFalse(irank < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1069 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1070 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1071 rcount[irank]++; 1072 } 1073 CHKERRQ(PetscFree2(rcount,ranks)); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 /*@C 1078 PetscSFGetGroups - gets incoming and outgoing process groups 1079 1080 Collective 1081 1082 Input Parameter: 1083 . sf - star forest 1084 1085 Output Parameters: 1086 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1087 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1088 1089 Level: developer 1090 1091 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1092 @*/ 1093 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1094 { 1095 MPI_Group group = MPI_GROUP_NULL; 1096 1097 PetscFunctionBegin; 1098 PetscCheckFalse(sf->nranks < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1099 if (sf->ingroup == MPI_GROUP_NULL) { 1100 PetscInt i; 1101 const PetscInt *indegree; 1102 PetscMPIInt rank,*outranks,*inranks; 1103 PetscSFNode *remote; 1104 PetscSF bgcount; 1105 1106 /* Compute the number of incoming ranks */ 1107 CHKERRQ(PetscMalloc1(sf->nranks,&remote)); 1108 for (i=0; i<sf->nranks; i++) { 1109 remote[i].rank = sf->ranks[i]; 1110 remote[i].index = 0; 1111 } 1112 CHKERRQ(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount)); 1113 CHKERRQ(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER)); 1114 CHKERRQ(PetscSFComputeDegreeBegin(bgcount,&indegree)); 1115 CHKERRQ(PetscSFComputeDegreeEnd(bgcount,&indegree)); 1116 /* Enumerate the incoming ranks */ 1117 CHKERRQ(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks)); 1118 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 1119 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1120 CHKERRQ(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks)); 1121 CHKERRQ(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks)); 1122 CHKERRMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1123 CHKERRMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup)); 1124 CHKERRMPI(MPI_Group_free(&group)); 1125 CHKERRQ(PetscFree2(inranks,outranks)); 1126 CHKERRQ(PetscSFDestroy(&bgcount)); 1127 } 1128 *incoming = sf->ingroup; 1129 1130 if (sf->outgroup == MPI_GROUP_NULL) { 1131 CHKERRMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group)); 1132 CHKERRMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup)); 1133 CHKERRMPI(MPI_Group_free(&group)); 1134 } 1135 *outgoing = sf->outgroup; 1136 PetscFunctionReturn(0); 1137 } 1138 1139 /*@ 1140 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1141 1142 Collective 1143 1144 Input Parameter: 1145 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1146 1147 Output Parameter: 1148 . multi - star forest with split roots, such that each root has degree exactly 1 1149 1150 Level: developer 1151 1152 Notes: 1153 1154 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1155 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1156 edge, it is a candidate for future optimization that might involve its removal. 1157 1158 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1159 @*/ 1160 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1161 { 1162 PetscFunctionBegin; 1163 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1164 PetscValidPointer(multi,2); 1165 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1166 CHKERRQ(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi)); 1167 *multi = sf->multi; 1168 sf->multi->multi = sf->multi; 1169 PetscFunctionReturn(0); 1170 } 1171 if (!sf->multi) { 1172 const PetscInt *indegree; 1173 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1174 PetscSFNode *remote; 1175 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1176 CHKERRQ(PetscSFComputeDegreeBegin(sf,&indegree)); 1177 CHKERRQ(PetscSFComputeDegreeEnd(sf,&indegree)); 1178 CHKERRQ(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset)); 1179 inoffset[0] = 0; 1180 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1181 for (i=0; i<maxlocal; i++) outones[i] = 1; 1182 CHKERRQ(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM)); 1183 CHKERRQ(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM)); 1184 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1185 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1186 for (i=0; i<sf->nroots; i++) { 1187 PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1188 } 1189 } 1190 CHKERRQ(PetscMalloc1(sf->nleaves,&remote)); 1191 for (i=0; i<sf->nleaves; i++) { 1192 remote[i].rank = sf->remote[i].rank; 1193 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1194 } 1195 CHKERRQ(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi)); 1196 sf->multi->multi = sf->multi; 1197 CHKERRQ(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER)); 1198 if (sf->rankorder) { /* Sort the ranks */ 1199 PetscMPIInt rank; 1200 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1201 PetscSFNode *newremote; 1202 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 1203 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1204 CHKERRQ(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset)); 1205 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1206 CHKERRQ(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE)); 1207 CHKERRQ(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE)); 1208 /* Sort the incoming ranks at each vertex, build the inverse map */ 1209 for (i=0; i<sf->nroots; i++) { 1210 PetscInt j; 1211 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1212 CHKERRQ(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset)); 1213 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1214 } 1215 CHKERRQ(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE)); 1216 CHKERRQ(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE)); 1217 CHKERRQ(PetscMalloc1(sf->nleaves,&newremote)); 1218 for (i=0; i<sf->nleaves; i++) { 1219 newremote[i].rank = sf->remote[i].rank; 1220 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1221 } 1222 CHKERRQ(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER)); 1223 CHKERRQ(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset)); 1224 } 1225 CHKERRQ(PetscFree3(inoffset,outones,outoffset)); 1226 } 1227 *multi = sf->multi; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 /*@C 1232 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1233 1234 Collective 1235 1236 Input Parameters: 1237 + sf - original star forest 1238 . nselected - number of selected roots on this process 1239 - selected - indices of the selected roots on this process 1240 1241 Output Parameter: 1242 . esf - new star forest 1243 1244 Level: advanced 1245 1246 Note: 1247 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1248 be done by calling PetscSFGetGraph(). 1249 1250 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1251 @*/ 1252 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1253 { 1254 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1255 const PetscInt *ilocal; 1256 signed char *rootdata,*leafdata,*leafmem; 1257 const PetscSFNode *iremote; 1258 PetscSFNode *new_iremote; 1259 MPI_Comm comm; 1260 1261 PetscFunctionBegin; 1262 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1263 PetscSFCheckGraphSet(sf,1); 1264 if (nselected) PetscValidPointer(selected,3); 1265 PetscValidPointer(esf,4); 1266 1267 CHKERRQ(PetscSFSetUp(sf)); 1268 CHKERRQ(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0)); 1269 CHKERRQ(PetscObjectGetComm((PetscObject)sf,&comm)); 1270 CHKERRQ(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 1271 1272 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1273 PetscBool dups; 1274 CHKERRQ(PetscCheckDupsInt(nselected,selected,&dups)); 1275 PetscCheckFalse(dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1276 for (i=0; i<nselected; i++) 1277 PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1278 } 1279 1280 if (sf->ops->CreateEmbeddedRootSF) { 1281 CHKERRQ((*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf)); 1282 } else { 1283 /* A generic version of creating embedded sf */ 1284 CHKERRQ(PetscSFGetLeafRange(sf,&minleaf,&maxleaf)); 1285 maxlocal = maxleaf - minleaf + 1; 1286 CHKERRQ(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem)); 1287 leafdata = leafmem - minleaf; 1288 /* Tag selected roots and bcast to leaves */ 1289 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1290 CHKERRQ(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE)); 1291 CHKERRQ(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE)); 1292 1293 /* Build esf with leaves that are still connected */ 1294 esf_nleaves = 0; 1295 for (i=0; i<nleaves; i++) { 1296 j = ilocal ? ilocal[i] : i; 1297 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1298 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1299 */ 1300 esf_nleaves += (leafdata[j] ? 1 : 0); 1301 } 1302 CHKERRQ(PetscMalloc1(esf_nleaves,&new_ilocal)); 1303 CHKERRQ(PetscMalloc1(esf_nleaves,&new_iremote)); 1304 for (i=n=0; i<nleaves; i++) { 1305 j = ilocal ? ilocal[i] : i; 1306 if (leafdata[j]) { 1307 new_ilocal[n] = j; 1308 new_iremote[n].rank = iremote[i].rank; 1309 new_iremote[n].index = iremote[i].index; 1310 ++n; 1311 } 1312 } 1313 CHKERRQ(PetscSFCreate(comm,esf)); 1314 CHKERRQ(PetscSFSetFromOptions(*esf)); 1315 CHKERRQ(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 1316 CHKERRQ(PetscFree2(rootdata,leafmem)); 1317 } 1318 CHKERRQ(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0)); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 /*@C 1323 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1324 1325 Collective 1326 1327 Input Parameters: 1328 + sf - original star forest 1329 . nselected - number of selected leaves on this process 1330 - selected - indices of the selected leaves on this process 1331 1332 Output Parameter: 1333 . newsf - new star forest 1334 1335 Level: advanced 1336 1337 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1338 @*/ 1339 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1340 { 1341 const PetscSFNode *iremote; 1342 PetscSFNode *new_iremote; 1343 const PetscInt *ilocal; 1344 PetscInt i,nroots,*leaves,*new_ilocal; 1345 MPI_Comm comm; 1346 1347 PetscFunctionBegin; 1348 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1349 PetscSFCheckGraphSet(sf,1); 1350 if (nselected) PetscValidPointer(selected,3); 1351 PetscValidPointer(newsf,4); 1352 1353 /* Uniq selected[] and put results in leaves[] */ 1354 CHKERRQ(PetscObjectGetComm((PetscObject)sf,&comm)); 1355 CHKERRQ(PetscMalloc1(nselected,&leaves)); 1356 CHKERRQ(PetscArraycpy(leaves,selected,nselected)); 1357 CHKERRQ(PetscSortedRemoveDupsInt(&nselected,leaves)); 1358 PetscCheckFalse(nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1359 1360 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1361 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1362 CHKERRQ((*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf)); 1363 } else { 1364 CHKERRQ(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote)); 1365 CHKERRQ(PetscMalloc1(nselected,&new_ilocal)); 1366 CHKERRQ(PetscMalloc1(nselected,&new_iremote)); 1367 for (i=0; i<nselected; ++i) { 1368 const PetscInt l = leaves[i]; 1369 new_ilocal[i] = ilocal ? ilocal[l] : l; 1370 new_iremote[i].rank = iremote[l].rank; 1371 new_iremote[i].index = iremote[l].index; 1372 } 1373 CHKERRQ(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf)); 1374 CHKERRQ(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 1375 } 1376 CHKERRQ(PetscFree(leaves)); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 /*@C 1381 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1382 1383 Collective on PetscSF 1384 1385 Input Parameters: 1386 + sf - star forest on which to communicate 1387 . unit - data type associated with each node 1388 . rootdata - buffer to broadcast 1389 - op - operation to use for reduction 1390 1391 Output Parameter: 1392 . leafdata - buffer to be reduced with values from each leaf's respective root 1393 1394 Level: intermediate 1395 1396 Notes: 1397 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1398 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1399 use PetscSFBcastWithMemTypeBegin() instead. 1400 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1401 @*/ 1402 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1403 { 1404 PetscMemType rootmtype,leafmtype; 1405 1406 PetscFunctionBegin; 1407 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1408 CHKERRQ(PetscSFSetUp(sf)); 1409 if (!sf->vscat.logging) CHKERRQ(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 1410 CHKERRQ(PetscGetMemType(rootdata,&rootmtype)); 1411 CHKERRQ(PetscGetMemType(leafdata,&leafmtype)); 1412 CHKERRQ((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op)); 1413 if (!sf->vscat.logging) CHKERRQ(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 /*@C 1418 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1419 1420 Collective on PetscSF 1421 1422 Input Parameters: 1423 + sf - star forest on which to communicate 1424 . unit - data type associated with each node 1425 . rootmtype - memory type of rootdata 1426 . rootdata - buffer to broadcast 1427 . leafmtype - memory type of leafdata 1428 - op - operation to use for reduction 1429 1430 Output Parameter: 1431 . leafdata - buffer to be reduced with values from each leaf's respective root 1432 1433 Level: intermediate 1434 1435 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1436 @*/ 1437 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1438 { 1439 PetscFunctionBegin; 1440 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1441 CHKERRQ(PetscSFSetUp(sf)); 1442 if (!sf->vscat.logging) CHKERRQ(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 1443 CHKERRQ((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op)); 1444 if (!sf->vscat.logging) CHKERRQ(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 /*@C 1449 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1450 1451 Collective 1452 1453 Input Parameters: 1454 + sf - star forest 1455 . unit - data type 1456 . rootdata - buffer to broadcast 1457 - op - operation to use for reduction 1458 1459 Output Parameter: 1460 . leafdata - buffer to be reduced with values from each leaf's respective root 1461 1462 Level: intermediate 1463 1464 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1465 @*/ 1466 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1467 { 1468 PetscFunctionBegin; 1469 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1470 if (!sf->vscat.logging) CHKERRQ(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0)); 1471 CHKERRQ((*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op)); 1472 if (!sf->vscat.logging) CHKERRQ(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0)); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 /*@C 1477 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1478 1479 Collective 1480 1481 Input Parameters: 1482 + sf - star forest 1483 . unit - data type 1484 . leafdata - values to reduce 1485 - op - reduction operation 1486 1487 Output Parameter: 1488 . rootdata - result of reduction of values from all leaves of each root 1489 1490 Level: intermediate 1491 1492 Notes: 1493 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1494 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1495 use PetscSFReduceWithMemTypeBegin() instead. 1496 1497 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1498 @*/ 1499 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1500 { 1501 PetscMemType rootmtype,leafmtype; 1502 1503 PetscFunctionBegin; 1504 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1505 CHKERRQ(PetscSFSetUp(sf)); 1506 if (!sf->vscat.logging) CHKERRQ(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0)); 1507 CHKERRQ(PetscGetMemType(rootdata,&rootmtype)); 1508 CHKERRQ(PetscGetMemType(leafdata,&leafmtype)); 1509 CHKERRQ((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op)); 1510 if (!sf->vscat.logging) CHKERRQ(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0)); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 /*@C 1515 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1516 1517 Collective 1518 1519 Input Parameters: 1520 + sf - star forest 1521 . unit - data type 1522 . leafmtype - memory type of leafdata 1523 . leafdata - values to reduce 1524 . rootmtype - memory type of rootdata 1525 - op - reduction operation 1526 1527 Output Parameter: 1528 . rootdata - result of reduction of values from all leaves of each root 1529 1530 Level: intermediate 1531 1532 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1533 @*/ 1534 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1535 { 1536 PetscFunctionBegin; 1537 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1538 CHKERRQ(PetscSFSetUp(sf)); 1539 if (!sf->vscat.logging) CHKERRQ(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0)); 1540 CHKERRQ((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op)); 1541 if (!sf->vscat.logging) CHKERRQ(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0)); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 /*@C 1546 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1547 1548 Collective 1549 1550 Input Parameters: 1551 + sf - star forest 1552 . unit - data type 1553 . leafdata - values to reduce 1554 - op - reduction operation 1555 1556 Output Parameter: 1557 . rootdata - result of reduction of values from all leaves of each root 1558 1559 Level: intermediate 1560 1561 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1562 @*/ 1563 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1564 { 1565 PetscFunctionBegin; 1566 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1567 if (!sf->vscat.logging) CHKERRQ(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0)); 1568 CHKERRQ((*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op)); 1569 if (!sf->vscat.logging) CHKERRQ(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0)); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 /*@C 1574 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1575 1576 Collective 1577 1578 Input Parameters: 1579 + sf - star forest 1580 . unit - data type 1581 . leafdata - leaf values to use in reduction 1582 - op - operation to use for reduction 1583 1584 Output Parameters: 1585 + rootdata - root values to be updated, input state is seen by first process to perform an update 1586 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1587 1588 Level: advanced 1589 1590 Note: 1591 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1592 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1593 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1594 integers. 1595 1596 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1597 @*/ 1598 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1599 { 1600 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1601 1602 PetscFunctionBegin; 1603 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1604 CHKERRQ(PetscSFSetUp(sf)); 1605 CHKERRQ(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1606 CHKERRQ(PetscGetMemType(rootdata,&rootmtype)); 1607 CHKERRQ(PetscGetMemType(leafdata,&leafmtype)); 1608 CHKERRQ(PetscGetMemType(leafupdate,&leafupdatemtype)); 1609 PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1610 CHKERRQ((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op)); 1611 CHKERRQ(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1612 PetscFunctionReturn(0); 1613 } 1614 1615 /*@C 1616 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1617 1618 Collective 1619 1620 Input Parameters: 1621 + sf - star forest 1622 . unit - data type 1623 . rootmtype - memory type of rootdata 1624 . leafmtype - memory type of leafdata 1625 . leafdata - leaf values to use in reduction 1626 . leafupdatemtype - memory type of leafupdate 1627 - op - operation to use for reduction 1628 1629 Output Parameters: 1630 + rootdata - root values to be updated, input state is seen by first process to perform an update 1631 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1632 1633 Level: advanced 1634 1635 Note: See PetscSFFetchAndOpBegin() for more details. 1636 1637 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1638 @*/ 1639 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op) 1640 { 1641 PetscFunctionBegin; 1642 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1643 CHKERRQ(PetscSFSetUp(sf)); 1644 CHKERRQ(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1645 PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1646 CHKERRQ((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op)); 1647 CHKERRQ(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0)); 1648 PetscFunctionReturn(0); 1649 } 1650 1651 /*@C 1652 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1653 1654 Collective 1655 1656 Input Parameters: 1657 + sf - star forest 1658 . unit - data type 1659 . leafdata - leaf values to use in reduction 1660 - op - operation to use for reduction 1661 1662 Output Parameters: 1663 + rootdata - root values to be updated, input state is seen by first process to perform an update 1664 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1665 1666 Level: advanced 1667 1668 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1669 @*/ 1670 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1671 { 1672 PetscFunctionBegin; 1673 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1674 CHKERRQ(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0)); 1675 CHKERRQ((*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op)); 1676 CHKERRQ(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0)); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 /*@C 1681 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1682 1683 Collective 1684 1685 Input Parameter: 1686 . sf - star forest 1687 1688 Output Parameter: 1689 . degree - degree of each root vertex 1690 1691 Level: advanced 1692 1693 Notes: 1694 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1695 1696 .seealso: PetscSFGatherBegin() 1697 @*/ 1698 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1699 { 1700 PetscFunctionBegin; 1701 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1702 PetscSFCheckGraphSet(sf,1); 1703 PetscValidPointer(degree,2); 1704 if (!sf->degreeknown) { 1705 PetscInt i, nroots = sf->nroots, maxlocal; 1706 PetscCheckFalse(sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1707 maxlocal = sf->maxleaf-sf->minleaf+1; 1708 CHKERRQ(PetscMalloc1(nroots,&sf->degree)); 1709 CHKERRQ(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1710 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1711 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1712 CHKERRQ(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM)); 1713 } 1714 *degree = NULL; 1715 PetscFunctionReturn(0); 1716 } 1717 1718 /*@C 1719 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1720 1721 Collective 1722 1723 Input Parameter: 1724 . sf - star forest 1725 1726 Output Parameter: 1727 . degree - degree of each root vertex 1728 1729 Level: developer 1730 1731 Notes: 1732 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1733 1734 .seealso: 1735 @*/ 1736 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1737 { 1738 PetscFunctionBegin; 1739 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1740 PetscSFCheckGraphSet(sf,1); 1741 PetscValidPointer(degree,2); 1742 if (!sf->degreeknown) { 1743 PetscCheckFalse(!sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1744 CHKERRQ(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM)); 1745 CHKERRQ(PetscFree(sf->degreetmp)); 1746 sf->degreeknown = PETSC_TRUE; 1747 } 1748 *degree = sf->degree; 1749 PetscFunctionReturn(0); 1750 } 1751 1752 /*@C 1753 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1754 Each multi-root is assigned index of the corresponding original root. 1755 1756 Collective 1757 1758 Input Parameters: 1759 + sf - star forest 1760 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1761 1762 Output Parameters: 1763 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1764 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1765 1766 Level: developer 1767 1768 Notes: 1769 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1770 1771 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1772 @*/ 1773 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1774 { 1775 PetscSF msf; 1776 PetscInt i, j, k, nroots, nmroots; 1777 1778 PetscFunctionBegin; 1779 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1780 CHKERRQ(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 1781 if (nroots) PetscValidIntPointer(degree,2); 1782 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1783 PetscValidPointer(multiRootsOrigNumbering,4); 1784 CHKERRQ(PetscSFGetMultiSF(sf,&msf)); 1785 CHKERRQ(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL)); 1786 CHKERRQ(PetscMalloc1(nmroots, multiRootsOrigNumbering)); 1787 for (i=0,j=0,k=0; i<nroots; i++) { 1788 if (!degree[i]) continue; 1789 for (j=0; j<degree[i]; j++,k++) { 1790 (*multiRootsOrigNumbering)[k] = i; 1791 } 1792 } 1793 PetscCheckFalse(k != nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1794 if (nMultiRoots) *nMultiRoots = nmroots; 1795 PetscFunctionReturn(0); 1796 } 1797 1798 /*@C 1799 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1800 1801 Collective 1802 1803 Input Parameters: 1804 + sf - star forest 1805 . unit - data type 1806 - leafdata - leaf data to gather to roots 1807 1808 Output Parameter: 1809 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1810 1811 Level: intermediate 1812 1813 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1814 @*/ 1815 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1816 { 1817 PetscSF multi = NULL; 1818 1819 PetscFunctionBegin; 1820 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1821 CHKERRQ(PetscSFSetUp(sf)); 1822 CHKERRQ(PetscSFGetMultiSF(sf,&multi)); 1823 CHKERRQ(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE)); 1824 PetscFunctionReturn(0); 1825 } 1826 1827 /*@C 1828 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1829 1830 Collective 1831 1832 Input Parameters: 1833 + sf - star forest 1834 . unit - data type 1835 - leafdata - leaf data to gather to roots 1836 1837 Output Parameter: 1838 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1839 1840 Level: intermediate 1841 1842 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1843 @*/ 1844 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1845 { 1846 PetscSF multi = NULL; 1847 1848 PetscFunctionBegin; 1849 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1850 CHKERRQ(PetscSFGetMultiSF(sf,&multi)); 1851 CHKERRQ(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE)); 1852 PetscFunctionReturn(0); 1853 } 1854 1855 /*@C 1856 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1857 1858 Collective 1859 1860 Input Parameters: 1861 + sf - star forest 1862 . unit - data type 1863 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1864 1865 Output Parameter: 1866 . leafdata - leaf data to be update with personal data from each respective root 1867 1868 Level: intermediate 1869 1870 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1871 @*/ 1872 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1873 { 1874 PetscSF multi = NULL; 1875 1876 PetscFunctionBegin; 1877 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1878 CHKERRQ(PetscSFSetUp(sf)); 1879 CHKERRQ(PetscSFGetMultiSF(sf,&multi)); 1880 CHKERRQ(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE)); 1881 PetscFunctionReturn(0); 1882 } 1883 1884 /*@C 1885 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1886 1887 Collective 1888 1889 Input Parameters: 1890 + sf - star forest 1891 . unit - data type 1892 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1893 1894 Output Parameter: 1895 . leafdata - leaf data to be update with personal data from each respective root 1896 1897 Level: intermediate 1898 1899 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1900 @*/ 1901 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1902 { 1903 PetscSF multi = NULL; 1904 1905 PetscFunctionBegin; 1906 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1907 CHKERRQ(PetscSFGetMultiSF(sf,&multi)); 1908 CHKERRQ(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE)); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1913 { 1914 PetscInt i, n, nleaves; 1915 const PetscInt *ilocal = NULL; 1916 PetscHSetI seen; 1917 1918 PetscFunctionBegin; 1919 if (PetscDefined(USE_DEBUG)) { 1920 CHKERRQ(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL)); 1921 CHKERRQ(PetscHSetICreate(&seen)); 1922 for (i = 0; i < nleaves; i++) { 1923 const PetscInt leaf = ilocal ? ilocal[i] : i; 1924 CHKERRQ(PetscHSetIAdd(seen,leaf)); 1925 } 1926 CHKERRQ(PetscHSetIGetSize(seen,&n)); 1927 PetscCheckFalse(n != nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1928 CHKERRQ(PetscHSetIDestroy(&seen)); 1929 } 1930 PetscFunctionReturn(0); 1931 } 1932 1933 /*@ 1934 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1935 1936 Input Parameters: 1937 + sfA - The first PetscSF 1938 - sfB - The second PetscSF 1939 1940 Output Parameters: 1941 . sfBA - The composite SF 1942 1943 Level: developer 1944 1945 Notes: 1946 Currently, the two SFs must be defined on congruent communicators and they must be true star 1947 forests, i.e. the same leaf is not connected with different roots. 1948 1949 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1950 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1951 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1952 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1953 1954 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1955 @*/ 1956 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1957 { 1958 const PetscSFNode *remotePointsA,*remotePointsB; 1959 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1960 const PetscInt *localPointsA,*localPointsB; 1961 PetscInt *localPointsBA; 1962 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1963 PetscBool denseB; 1964 1965 PetscFunctionBegin; 1966 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1967 PetscSFCheckGraphSet(sfA,1); 1968 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1969 PetscSFCheckGraphSet(sfB,2); 1970 PetscCheckSameComm(sfA,1,sfB,2); 1971 PetscValidPointer(sfBA,3); 1972 CHKERRQ(PetscSFCheckLeavesUnique_Private(sfA)); 1973 CHKERRQ(PetscSFCheckLeavesUnique_Private(sfB)); 1974 1975 CHKERRQ(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA)); 1976 CHKERRQ(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB)); 1977 if (localPointsA) { 1978 CHKERRQ(PetscMalloc1(numRootsB,&reorderedRemotePointsA)); 1979 for (i=0; i<numRootsB; i++) { 1980 reorderedRemotePointsA[i].rank = -1; 1981 reorderedRemotePointsA[i].index = -1; 1982 } 1983 for (i=0; i<numLeavesA; i++) { 1984 if (localPointsA[i] >= numRootsB) continue; 1985 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1986 } 1987 remotePointsA = reorderedRemotePointsA; 1988 } 1989 CHKERRQ(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf)); 1990 CHKERRQ(PetscMalloc1(maxleaf-minleaf+1,&leafdataB)); 1991 CHKERRQ(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE)); 1992 CHKERRQ(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE)); 1993 CHKERRQ(PetscFree(reorderedRemotePointsA)); 1994 1995 denseB = (PetscBool)!localPointsB; 1996 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1997 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1998 else numLeavesBA++; 1999 } 2000 if (denseB) { 2001 localPointsBA = NULL; 2002 remotePointsBA = leafdataB; 2003 } else { 2004 CHKERRQ(PetscMalloc1(numLeavesBA,&localPointsBA)); 2005 CHKERRQ(PetscMalloc1(numLeavesBA,&remotePointsBA)); 2006 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2007 const PetscInt l = localPointsB ? localPointsB[i] : i; 2008 2009 if (leafdataB[l-minleaf].rank == -1) continue; 2010 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2011 localPointsBA[numLeavesBA] = l; 2012 numLeavesBA++; 2013 } 2014 CHKERRQ(PetscFree(leafdataB)); 2015 } 2016 CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA)); 2017 CHKERRQ(PetscSFSetFromOptions(*sfBA)); 2018 CHKERRQ(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER)); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 /*@ 2023 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2024 2025 Input Parameters: 2026 + sfA - The first PetscSF 2027 - sfB - The second PetscSF 2028 2029 Output Parameters: 2030 . sfBA - The composite SF. 2031 2032 Level: developer 2033 2034 Notes: 2035 Currently, the two SFs must be defined on congruent communicators and they must be true star 2036 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2037 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2038 2039 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2040 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2041 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2042 a Reduce on sfB, on connected roots. 2043 2044 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2045 @*/ 2046 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2047 { 2048 const PetscSFNode *remotePointsA,*remotePointsB; 2049 PetscSFNode *remotePointsBA; 2050 const PetscInt *localPointsA,*localPointsB; 2051 PetscSFNode *reorderedRemotePointsA = NULL; 2052 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2053 MPI_Op op; 2054 #if defined(PETSC_USE_64BIT_INDICES) 2055 PetscBool iswin; 2056 #endif 2057 2058 PetscFunctionBegin; 2059 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2060 PetscSFCheckGraphSet(sfA,1); 2061 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2062 PetscSFCheckGraphSet(sfB,2); 2063 PetscCheckSameComm(sfA,1,sfB,2); 2064 PetscValidPointer(sfBA,3); 2065 CHKERRQ(PetscSFCheckLeavesUnique_Private(sfA)); 2066 CHKERRQ(PetscSFCheckLeavesUnique_Private(sfB)); 2067 2068 CHKERRQ(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA)); 2069 CHKERRQ(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB)); 2070 2071 /* TODO: Check roots of sfB have degree of 1 */ 2072 /* Once we implement it, we can replace the MPI_MAXLOC 2073 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2074 We use MPI_MAXLOC only to have a deterministic output from this routine if 2075 the root condition is not meet. 2076 */ 2077 op = MPI_MAXLOC; 2078 #if defined(PETSC_USE_64BIT_INDICES) 2079 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2080 CHKERRQ(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin)); 2081 if (iswin) op = MPI_REPLACE; 2082 #endif 2083 2084 CHKERRQ(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf)); 2085 CHKERRQ(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA)); 2086 for (i=0; i<maxleaf - minleaf + 1; i++) { 2087 reorderedRemotePointsA[i].rank = -1; 2088 reorderedRemotePointsA[i].index = -1; 2089 } 2090 if (localPointsA) { 2091 for (i=0; i<numLeavesA; i++) { 2092 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2093 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2094 } 2095 } else { 2096 for (i=0; i<numLeavesA; i++) { 2097 if (i > maxleaf || i < minleaf) continue; 2098 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2099 } 2100 } 2101 2102 CHKERRQ(PetscMalloc1(numRootsB,&localPointsBA)); 2103 CHKERRQ(PetscMalloc1(numRootsB,&remotePointsBA)); 2104 for (i=0; i<numRootsB; i++) { 2105 remotePointsBA[i].rank = -1; 2106 remotePointsBA[i].index = -1; 2107 } 2108 2109 CHKERRQ(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op)); 2110 CHKERRQ(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op)); 2111 CHKERRQ(PetscFree(reorderedRemotePointsA)); 2112 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2113 if (remotePointsBA[i].rank == -1) continue; 2114 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2115 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2116 localPointsBA[numLeavesBA] = i; 2117 numLeavesBA++; 2118 } 2119 CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA)); 2120 CHKERRQ(PetscSFSetFromOptions(*sfBA)); 2121 CHKERRQ(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER)); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 /* 2126 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2127 2128 Input Parameters: 2129 . sf - The global PetscSF 2130 2131 Output Parameters: 2132 . out - The local PetscSF 2133 */ 2134 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2135 { 2136 MPI_Comm comm; 2137 PetscMPIInt myrank; 2138 const PetscInt *ilocal; 2139 const PetscSFNode *iremote; 2140 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2141 PetscSFNode *liremote; 2142 PetscSF lsf; 2143 2144 PetscFunctionBegin; 2145 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2146 if (sf->ops->CreateLocalSF) { 2147 CHKERRQ((*sf->ops->CreateLocalSF)(sf,out)); 2148 } else { 2149 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2150 CHKERRQ(PetscObjectGetComm((PetscObject)sf,&comm)); 2151 CHKERRMPI(MPI_Comm_rank(comm,&myrank)); 2152 2153 /* Find out local edges and build a local SF */ 2154 CHKERRQ(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote)); 2155 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2156 CHKERRQ(PetscMalloc1(lnleaves,&lilocal)); 2157 CHKERRQ(PetscMalloc1(lnleaves,&liremote)); 2158 2159 for (i=j=0; i<nleaves; i++) { 2160 if (iremote[i].rank == (PetscInt)myrank) { 2161 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2162 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2163 liremote[j].index = iremote[i].index; 2164 j++; 2165 } 2166 } 2167 CHKERRQ(PetscSFCreate(PETSC_COMM_SELF,&lsf)); 2168 CHKERRQ(PetscSFSetFromOptions(lsf)); 2169 CHKERRQ(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER)); 2170 CHKERRQ(PetscSFSetUp(lsf)); 2171 *out = lsf; 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2177 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2178 { 2179 PetscMemType rootmtype,leafmtype; 2180 2181 PetscFunctionBegin; 2182 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2183 CHKERRQ(PetscSFSetUp(sf)); 2184 CHKERRQ(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0)); 2185 CHKERRQ(PetscGetMemType(rootdata,&rootmtype)); 2186 CHKERRQ(PetscGetMemType(leafdata,&leafmtype)); 2187 if (sf->ops->BcastToZero) { 2188 CHKERRQ((*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata)); 2189 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2190 CHKERRQ(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0)); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 /*@ 2195 PetscSFConcatenate - concatenate multiple SFs into one 2196 2197 Input Parameters: 2198 + comm - the communicator 2199 . nsfs - the number of input PetscSF 2200 . sfs - the array of input PetscSF 2201 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE) 2202 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage 2203 2204 Output Parameters: 2205 . newsf - The resulting PetscSF 2206 2207 Level: developer 2208 2209 Notes: 2210 The communicator of all SFs in sfs must be comm. 2211 2212 The offsets in leafOffsets are added to the original leaf indices. 2213 2214 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2215 In this case, NULL is also passed as ilocal to the resulting SF. 2216 2217 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2218 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2219 2220 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph() 2221 @*/ 2222 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) 2223 { 2224 PetscInt i, s, nLeaves, nRoots; 2225 PetscInt *leafArrayOffsets; 2226 PetscInt *ilocal_new; 2227 PetscSFNode *iremote_new; 2228 PetscInt *rootOffsets; 2229 PetscBool all_ilocal_null = PETSC_FALSE; 2230 PetscMPIInt rank; 2231 2232 PetscFunctionBegin; 2233 { 2234 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2235 2236 CHKERRQ(PetscSFCreate(comm,&dummy)); 2237 PetscValidLogicalCollectiveInt(dummy,nsfs,2); 2238 PetscValidPointer(sfs,3); 2239 for (i=0; i<nsfs; i++) { 2240 PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3); 2241 PetscCheckSameComm(dummy,1,sfs[i],3); 2242 } 2243 PetscValidLogicalCollectiveBool(dummy,shareRoots,4); 2244 if (leafOffsets) PetscValidIntPointer(leafOffsets,5); 2245 PetscValidPointer(newsf,6); 2246 CHKERRQ(PetscSFDestroy(&dummy)); 2247 } 2248 if (!nsfs) { 2249 CHKERRQ(PetscSFCreate(comm, newsf)); 2250 CHKERRQ(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 2251 PetscFunctionReturn(0); 2252 } 2253 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 2254 2255 CHKERRQ(PetscCalloc1(nsfs+1, &rootOffsets)); 2256 if (shareRoots) { 2257 CHKERRQ(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL)); 2258 if (PetscDefined(USE_DEBUG)) { 2259 for (s=1; s<nsfs; s++) { 2260 PetscInt nr; 2261 2262 CHKERRQ(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2263 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2264 } 2265 } 2266 } else { 2267 for (s=0; s<nsfs; s++) { 2268 PetscInt nr; 2269 2270 CHKERRQ(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL)); 2271 rootOffsets[s+1] = rootOffsets[s] + nr; 2272 } 2273 nRoots = rootOffsets[nsfs]; 2274 } 2275 2276 /* Calculate leaf array offsets and automatic root offsets */ 2277 CHKERRQ(PetscMalloc1(nsfs+1,&leafArrayOffsets)); 2278 leafArrayOffsets[0] = 0; 2279 for (s=0; s<nsfs; s++) { 2280 PetscInt nl; 2281 2282 CHKERRQ(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL)); 2283 leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl; 2284 } 2285 nLeaves = leafArrayOffsets[nsfs]; 2286 2287 if (!leafOffsets) { 2288 all_ilocal_null = PETSC_TRUE; 2289 for (s=0; s<nsfs; s++) { 2290 const PetscInt *ilocal; 2291 2292 CHKERRQ(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL)); 2293 if (ilocal) { 2294 all_ilocal_null = PETSC_FALSE; 2295 break; 2296 } 2297 } 2298 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2299 } 2300 2301 /* Renumber and concatenate local leaves */ 2302 ilocal_new = NULL; 2303 if (!all_ilocal_null) { 2304 CHKERRQ(PetscMalloc1(nLeaves, &ilocal_new)); 2305 for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1; 2306 for (s = 0; s<nsfs; s++) { 2307 const PetscInt *ilocal; 2308 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2309 PetscInt i, nleaves_l; 2310 2311 CHKERRQ(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL)); 2312 for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2313 } 2314 } 2315 2316 /* Renumber and concatenate remote roots */ 2317 CHKERRQ(PetscMalloc1(nLeaves, &iremote_new)); 2318 for (i = 0; i < nLeaves; i++) { 2319 iremote_new[i].rank = -1; 2320 iremote_new[i].index = -1; 2321 } 2322 for (s = 0; s<nsfs; s++) { 2323 PetscInt i, nl, nr; 2324 PetscSF tmp_sf; 2325 const PetscSFNode *iremote; 2326 PetscSFNode *tmp_rootdata; 2327 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2328 2329 CHKERRQ(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote)); 2330 CHKERRQ(PetscSFCreate(comm, &tmp_sf)); 2331 /* create helper SF with contiguous leaves */ 2332 CHKERRQ(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES)); 2333 CHKERRQ(PetscSFSetUp(tmp_sf)); 2334 CHKERRQ(PetscMalloc1(nr, &tmp_rootdata)); 2335 for (i = 0; i < nr; i++) { 2336 tmp_rootdata[i].index = i + rootOffsets[s]; 2337 tmp_rootdata[i].rank = (PetscInt) rank; 2338 } 2339 CHKERRQ(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2340 CHKERRQ(PetscSFBcastEnd( tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE)); 2341 CHKERRQ(PetscSFDestroy(&tmp_sf)); 2342 CHKERRQ(PetscFree(tmp_rootdata)); 2343 } 2344 2345 /* Build the new SF */ 2346 CHKERRQ(PetscSFCreate(comm, newsf)); 2347 CHKERRQ(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER)); 2348 CHKERRQ(PetscSFSetUp(*newsf)); 2349 CHKERRQ(PetscFree(rootOffsets)); 2350 CHKERRQ(PetscFree(leafArrayOffsets)); 2351 PetscFunctionReturn(0); 2352 } 2353