1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) do { \ 19 if (PetscUnlikely(!(sf)->graphset)) \ 20 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 21 } while (0) 22 #else 23 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window -Use MPI-3 one-sided window for communication 43 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 44 45 Level: intermediate 46 47 Notes: 48 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 49 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 50 SFs are optimized and they have better performance than general SFs. 51 52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 53 @*/ 54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 55 { 56 PetscErrorCode ierr; 57 PetscSF b; 58 59 PetscFunctionBegin; 60 PetscValidPointer(sf,2); 61 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 62 63 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 64 65 b->nroots = -1; 66 b->nleaves = -1; 67 b->minleaf = PETSC_MAX_INT; 68 b->maxleaf = PETSC_MIN_INT; 69 b->nranks = -1; 70 b->rankorder = PETSC_TRUE; 71 b->ingroup = MPI_GROUP_NULL; 72 b->outgroup = MPI_GROUP_NULL; 73 b->graphset = PETSC_FALSE; 74 #if defined(PETSC_HAVE_DEVICE) 75 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 76 b->use_stream_aware_mpi = PETSC_FALSE; 77 b->unknown_input_stream= PETSC_FALSE; 78 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 79 b->backend = PETSCSF_BACKEND_KOKKOS; 80 #elif defined(PETSC_HAVE_CUDA) 81 b->backend = PETSCSF_BACKEND_CUDA; 82 #elif defined(PETSC_HAVE_HIP) 83 b->backend = PETSCSF_BACKEND_HIP; 84 #endif 85 86 #if defined(PETSC_HAVE_NVSHMEM) 87 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 88 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 89 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 90 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 91 #endif 92 #endif 93 b->vscat.from_n = -1; 94 b->vscat.to_n = -1; 95 b->vscat.unit = MPIU_SCALAR; 96 *sf = b; 97 PetscFunctionReturn(0); 98 } 99 100 /*@ 101 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 102 103 Collective 104 105 Input Parameter: 106 . sf - star forest 107 108 Level: advanced 109 110 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 111 @*/ 112 PetscErrorCode PetscSFReset(PetscSF sf) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 118 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_MAX_INT; 122 sf->maxleaf = PETSC_MIN_INT; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 127 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 128 sf->nranks = -1; 129 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 130 sf->degreeknown = PETSC_FALSE; 131 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 132 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 133 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 134 if (sf->multi) sf->multi->multi = NULL; 135 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 136 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 137 138 #if defined(PETSC_HAVE_DEVICE) 139 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 140 #endif 141 142 sf->setupcalled = PETSC_FALSE; 143 PetscFunctionReturn(0); 144 } 145 146 /*@C 147 PetscSFSetType - Set the PetscSF communication implementation 148 149 Collective on PetscSF 150 151 Input Parameters: 152 + sf - the PetscSF context 153 - type - a known method 154 155 Options Database Key: 156 . -sf_type <type> - Sets the method; use -help for a list 157 of available methods (for instance, window, basic, neighbor) 158 159 Notes: 160 See "include/petscsf.h" for available methods (for instance) 161 + PETSCSFWINDOW - MPI-2/3 one-sided 162 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 163 164 Level: intermediate 165 166 .seealso: PetscSFType, PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 169 { 170 PetscErrorCode ierr,(*r)(PetscSF); 171 PetscBool match; 172 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 175 PetscValidCharPointer(type,2); 176 177 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 178 if (match) PetscFunctionReturn(0); 179 180 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 181 PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 182 /* Destroy the previous PetscSF implementation context */ 183 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 184 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 185 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 186 ierr = (*r)(sf);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 /*@C 191 PetscSFGetType - Get the PetscSF communication implementation 192 193 Not Collective 194 195 Input Parameter: 196 . sf - the PetscSF context 197 198 Output Parameter: 199 . type - the PetscSF type name 200 201 Level: intermediate 202 203 .seealso: PetscSFSetType(), PetscSFCreate() 204 @*/ 205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 206 { 207 PetscFunctionBegin; 208 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 209 PetscValidPointer(type,2); 210 *type = ((PetscObject)sf)->type_name; 211 PetscFunctionReturn(0); 212 } 213 214 /*@C 215 PetscSFDestroy - destroy star forest 216 217 Collective 218 219 Input Parameter: 220 . sf - address of star forest 221 222 Level: intermediate 223 224 .seealso: PetscSFCreate(), PetscSFReset() 225 @*/ 226 PetscErrorCode PetscSFDestroy(PetscSF *sf) 227 { 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 if (!*sf) PetscFunctionReturn(0); 232 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 233 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 234 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 235 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 236 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 237 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 238 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 243 { 244 PetscInt i, nleaves; 245 PetscMPIInt size; 246 const PetscInt *ilocal; 247 const PetscSFNode *iremote; 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 252 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 254 for (i = 0; i < nleaves; i++) { 255 const PetscInt rank = iremote[i].rank; 256 const PetscInt remote = iremote[i].index; 257 const PetscInt leaf = ilocal ? ilocal[i] : i; 258 PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 259 PetscCheckFalse(remote < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 260 PetscCheckFalse(leaf < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 261 } 262 PetscFunctionReturn(0); 263 } 264 265 /*@ 266 PetscSFSetUp - set up communication structures 267 268 Collective 269 270 Input Parameter: 271 . sf - star forest communication object 272 273 Level: beginner 274 275 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 276 @*/ 277 PetscErrorCode PetscSFSetUp(PetscSF sf) 278 { 279 PetscErrorCode ierr; 280 281 PetscFunctionBegin; 282 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 283 PetscSFCheckGraphSet(sf,1); 284 if (sf->setupcalled) PetscFunctionReturn(0); 285 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 286 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 287 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 288 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 289 #if defined(PETSC_HAVE_CUDA) 290 if (sf->backend == PETSCSF_BACKEND_CUDA) { 291 sf->ops->Malloc = PetscSFMalloc_CUDA; 292 sf->ops->Free = PetscSFFree_CUDA; 293 } 294 #endif 295 #if defined(PETSC_HAVE_HIP) 296 if (sf->backend == PETSCSF_BACKEND_HIP) { 297 sf->ops->Malloc = PetscSFMalloc_HIP; 298 sf->ops->Free = PetscSFFree_HIP; 299 } 300 #endif 301 302 # 303 #if defined(PETSC_HAVE_KOKKOS) 304 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 305 sf->ops->Malloc = PetscSFMalloc_Kokkos; 306 sf->ops->Free = PetscSFFree_Kokkos; 307 } 308 #endif 309 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 310 sf->setupcalled = PETSC_TRUE; 311 PetscFunctionReturn(0); 312 } 313 314 /*@ 315 PetscSFSetFromOptions - set PetscSF options using the options database 316 317 Logically Collective 318 319 Input Parameter: 320 . sf - star forest 321 322 Options Database Keys: 323 + -sf_type - implementation type, see PetscSFSetType() 324 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 325 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 326 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 327 If true, this option only works with -use_gpu_aware_mpi 1. 328 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 329 If true, this option only works with -use_gpu_aware_mpi 1. 330 331 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 332 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 333 the only available is kokkos. 334 335 Level: intermediate 336 @*/ 337 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 338 { 339 PetscSFType deft; 340 char type[256]; 341 PetscErrorCode ierr; 342 PetscBool flg; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 346 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 347 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 348 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 349 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 350 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 351 #if defined(PETSC_HAVE_DEVICE) 352 { 353 char backendstr[32] = {0}; 354 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 355 /* Change the defaults set in PetscSFCreate() with command line options */ 356 ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 357 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 358 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 359 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 360 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 361 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 362 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 363 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 364 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 365 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 366 else PetscCheckFalse(set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 367 #elif defined(PETSC_HAVE_KOKKOS) 368 PetscCheckFalse(set && !isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 369 #endif 370 } 371 #endif 372 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 373 ierr = PetscOptionsEnd();CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 /*@ 378 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 379 380 Logically Collective 381 382 Input Parameters: 383 + sf - star forest 384 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 385 386 Level: advanced 387 388 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 389 @*/ 390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 391 { 392 PetscFunctionBegin; 393 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 394 PetscValidLogicalCollectiveBool(sf,flg,2); 395 PetscCheckFalse(sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 396 sf->rankorder = flg; 397 PetscFunctionReturn(0); 398 } 399 400 /*@C 401 PetscSFSetGraph - Set a parallel star forest 402 403 Collective 404 405 Input Parameters: 406 + sf - star forest 407 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 408 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 409 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 410 during setup in debug mode) 411 . localmode - copy mode for ilocal 412 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 413 during setup in debug mode) 414 - remotemode - copy mode for iremote 415 416 Level: intermediate 417 418 Notes: 419 Leaf indices in ilocal must be unique, otherwise an error occurs. 420 421 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 422 In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 423 PETSc might modify the respective array; 424 if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 425 426 Fortran Notes: 427 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 428 429 Developer Notes: 430 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 431 This also allows to compare leaf sets of two SFs easily. 432 433 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 434 @*/ 435 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode) 436 { 437 PetscBool unique, contiguous; 438 PetscErrorCode ierr; 439 440 PetscFunctionBegin; 441 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 442 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 443 if (nleaves > 0) PetscValidPointer(iremote,6); 444 PetscCheckFalse(nroots < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 445 PetscCheckFalse(nleaves < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 446 PetscCheck(localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode); 447 PetscCheck(remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode); 448 449 if (sf->nroots >= 0) { /* Reset only if graph already set */ 450 ierr = PetscSFReset(sf);CHKERRQ(ierr); 451 } 452 453 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 454 455 sf->nroots = nroots; 456 sf->nleaves = nleaves; 457 458 if (localmode == PETSC_COPY_VALUES && ilocal) { 459 PetscInt *tlocal = NULL; 460 461 ierr = PetscMalloc1(nleaves,&tlocal);CHKERRQ(ierr); 462 ierr = PetscArraycpy(tlocal,ilocal,nleaves);CHKERRQ(ierr); 463 ilocal = tlocal; 464 } 465 if (remotemode == PETSC_COPY_VALUES) { 466 PetscSFNode *tremote = NULL; 467 468 ierr = PetscMalloc1(nleaves,&tremote);CHKERRQ(ierr); 469 ierr = PetscArraycpy(tremote,iremote,nleaves);CHKERRQ(ierr); 470 iremote = tremote; 471 } 472 473 if (nleaves && ilocal) { 474 PetscSFNode work; 475 476 ierr = PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);CHKERRQ(ierr); 477 ierr = PetscSortedCheckDupsInt(nleaves, ilocal, &unique);CHKERRQ(ierr); 478 unique = PetscNot(unique); 479 PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF"); 480 sf->minleaf = ilocal[0]; 481 sf->maxleaf = ilocal[nleaves-1]; 482 contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1); 483 } else { 484 sf->minleaf = 0; 485 sf->maxleaf = nleaves - 1; 486 unique = PETSC_TRUE; 487 contiguous = PETSC_TRUE; 488 } 489 490 if (contiguous) { 491 if (localmode == PETSC_USE_POINTER) { 492 ilocal = NULL; 493 } else { 494 ierr = PetscFree(ilocal);CHKERRQ(ierr); 495 } 496 } 497 sf->mine = ilocal; 498 if (localmode == PETSC_USE_POINTER) { 499 sf->mine_alloc = NULL; 500 } else { 501 sf->mine_alloc = ilocal; 502 } 503 sf->remote = iremote; 504 if (remotemode == PETSC_USE_POINTER) { 505 sf->remote_alloc = NULL; 506 } else { 507 sf->remote_alloc = iremote; 508 } 509 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 510 sf->graphset = PETSC_TRUE; 511 PetscFunctionReturn(0); 512 } 513 514 /*@ 515 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 516 517 Collective 518 519 Input Parameters: 520 + sf - The PetscSF 521 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 522 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 523 524 Notes: 525 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 526 n and N are local and global sizes of x respectively. 527 528 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 529 sequential vectors y on all ranks. 530 531 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 532 sequential vector y on rank 0. 533 534 In above cases, entries of x are roots and entries of y are leaves. 535 536 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 537 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 538 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 539 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 540 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 541 542 In this case, roots and leaves are symmetric. 543 544 Level: intermediate 545 @*/ 546 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 547 { 548 MPI_Comm comm; 549 PetscInt n,N,res[2]; 550 PetscMPIInt rank,size; 551 PetscSFType type; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 556 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 557 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 558 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 559 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 560 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 561 562 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 563 type = PETSCSFALLTOALL; 564 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 565 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 566 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 567 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 568 } else { 569 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 570 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 571 res[0] = n; 572 res[1] = -n; 573 /* Check if n are same over all ranks so that we can optimize it */ 574 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 575 if (res[0] == -res[1]) { /* same n */ 576 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 577 } else { 578 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 579 } 580 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 581 } 582 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 583 584 sf->pattern = pattern; 585 sf->mine = NULL; /* Contiguous */ 586 587 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 588 Also set other easy stuff. 589 */ 590 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 591 sf->nleaves = N; 592 sf->nroots = n; 593 sf->nranks = size; 594 sf->minleaf = 0; 595 sf->maxleaf = N - 1; 596 } else if (pattern == PETSCSF_PATTERN_GATHER) { 597 sf->nleaves = rank ? 0 : N; 598 sf->nroots = n; 599 sf->nranks = rank ? 0 : size; 600 sf->minleaf = 0; 601 sf->maxleaf = rank ? -1 : N - 1; 602 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 603 sf->nleaves = size; 604 sf->nroots = size; 605 sf->nranks = size; 606 sf->minleaf = 0; 607 sf->maxleaf = size - 1; 608 } 609 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 610 sf->graphset = PETSC_TRUE; 611 PetscFunctionReturn(0); 612 } 613 614 /*@ 615 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 616 617 Collective 618 619 Input Parameter: 620 . sf - star forest to invert 621 622 Output Parameter: 623 . isf - inverse of sf 624 625 Level: advanced 626 627 Notes: 628 All roots must have degree 1. 629 630 The local space may be a permutation, but cannot be sparse. 631 632 .seealso: PetscSFSetGraph() 633 @*/ 634 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 635 { 636 PetscErrorCode ierr; 637 PetscMPIInt rank; 638 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 639 const PetscInt *ilocal; 640 PetscSFNode *roots,*leaves; 641 642 PetscFunctionBegin; 643 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 644 PetscSFCheckGraphSet(sf,1); 645 PetscValidPointer(isf,2); 646 647 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 648 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 649 650 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 651 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 652 for (i=0; i<maxlocal; i++) { 653 leaves[i].rank = rank; 654 leaves[i].index = i; 655 } 656 for (i=0; i <nroots; i++) { 657 roots[i].rank = -1; 658 roots[i].index = -1; 659 } 660 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 661 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 662 663 /* Check whether our leaves are sparse */ 664 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 665 if (count == nroots) newilocal = NULL; 666 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 667 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 668 for (i=0,count=0; i<nroots; i++) { 669 if (roots[i].rank >= 0) { 670 newilocal[count] = i; 671 roots[count].rank = roots[i].rank; 672 roots[count].index = roots[i].index; 673 count++; 674 } 675 } 676 } 677 678 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 679 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 680 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 /*@ 685 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 686 687 Collective 688 689 Input Parameters: 690 + sf - communication object to duplicate 691 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 692 693 Output Parameter: 694 . newsf - new communication object 695 696 Level: beginner 697 698 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 699 @*/ 700 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 701 { 702 PetscSFType type; 703 PetscErrorCode ierr; 704 MPI_Datatype dtype=MPIU_SCALAR; 705 706 PetscFunctionBegin; 707 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 708 PetscValidLogicalCollectiveEnum(sf,opt,2); 709 PetscValidPointer(newsf,3); 710 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 711 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 712 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 713 if (opt == PETSCSF_DUPLICATE_GRAPH) { 714 PetscSFCheckGraphSet(sf,1); 715 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 716 PetscInt nroots,nleaves; 717 const PetscInt *ilocal; 718 const PetscSFNode *iremote; 719 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 720 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 721 } else { 722 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 723 } 724 } 725 /* Since oldtype is committed, so is newtype, according to MPI */ 726 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 727 (*newsf)->vscat.bs = sf->vscat.bs; 728 (*newsf)->vscat.unit = dtype; 729 (*newsf)->vscat.to_n = sf->vscat.to_n; 730 (*newsf)->vscat.from_n = sf->vscat.from_n; 731 /* Do not copy lsf. Build it on demand since it is rarely used */ 732 733 #if defined(PETSC_HAVE_DEVICE) 734 (*newsf)->backend = sf->backend; 735 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 736 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 737 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 738 #endif 739 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 740 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 741 PetscFunctionReturn(0); 742 } 743 744 /*@C 745 PetscSFGetGraph - Get the graph specifying a parallel star forest 746 747 Not Collective 748 749 Input Parameter: 750 . sf - star forest 751 752 Output Parameters: 753 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 754 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 755 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 756 - iremote - remote locations of root vertices for each leaf on the current process 757 758 Notes: 759 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 760 761 The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 762 see its manpage for details. 763 764 Fortran Notes: 765 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 766 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 767 768 To check for a NULL ilocal use 769 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 770 771 Level: intermediate 772 773 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 774 @*/ 775 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 776 { 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 781 if (sf->ops->GetGraph) { 782 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 783 } else { 784 if (nroots) *nroots = sf->nroots; 785 if (nleaves) *nleaves = sf->nleaves; 786 if (ilocal) *ilocal = sf->mine; 787 if (iremote) *iremote = sf->remote; 788 } 789 PetscFunctionReturn(0); 790 } 791 792 /*@ 793 PetscSFGetLeafRange - Get the active leaf ranges 794 795 Not Collective 796 797 Input Parameter: 798 . sf - star forest 799 800 Output Parameters: 801 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 802 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 803 804 Level: developer 805 806 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 807 @*/ 808 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 809 { 810 PetscFunctionBegin; 811 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 812 PetscSFCheckGraphSet(sf,1); 813 if (minleaf) *minleaf = sf->minleaf; 814 if (maxleaf) *maxleaf = sf->maxleaf; 815 PetscFunctionReturn(0); 816 } 817 818 /*@C 819 PetscSFViewFromOptions - View from Options 820 821 Collective on PetscSF 822 823 Input Parameters: 824 + A - the star forest 825 . obj - Optional object 826 - name - command line option 827 828 Level: intermediate 829 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 830 @*/ 831 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 832 { 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 837 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /*@C 842 PetscSFView - view a star forest 843 844 Collective 845 846 Input Parameters: 847 + sf - star forest 848 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 849 850 Level: beginner 851 852 .seealso: PetscSFCreate(), PetscSFSetGraph() 853 @*/ 854 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 855 { 856 PetscErrorCode ierr; 857 PetscBool iascii; 858 PetscViewerFormat format; 859 860 PetscFunctionBegin; 861 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 862 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 863 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 864 PetscCheckSameComm(sf,1,viewer,2); 865 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 866 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 867 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 868 PetscMPIInt rank; 869 PetscInt ii,i,j; 870 871 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 872 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 873 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 874 if (!sf->graphset) { 875 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 876 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 877 PetscFunctionReturn(0); 878 } 879 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 880 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 881 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 882 for (i=0; i<sf->nleaves; i++) { 883 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 884 } 885 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 886 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 887 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 888 PetscMPIInt *tmpranks,*perm; 889 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 890 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 891 for (i=0; i<sf->nranks; i++) perm[i] = i; 892 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 894 for (ii=0; ii<sf->nranks; ii++) { 895 i = perm[ii]; 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 897 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 899 } 900 } 901 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 902 } 903 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 904 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 905 } 906 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 907 } 908 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 909 PetscFunctionReturn(0); 910 } 911 912 /*@C 913 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 914 915 Not Collective 916 917 Input Parameter: 918 . sf - star forest 919 920 Output Parameters: 921 + nranks - number of ranks referenced by local part 922 . ranks - array of ranks 923 . roffset - offset in rmine/rremote for each rank (length nranks+1) 924 . rmine - concatenated array holding local indices referencing each remote rank 925 - rremote - concatenated array holding remote indices referenced for each remote rank 926 927 Level: developer 928 929 .seealso: PetscSFGetLeafRanks() 930 @*/ 931 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 932 { 933 PetscErrorCode ierr; 934 935 PetscFunctionBegin; 936 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 937 PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 938 if (sf->ops->GetRootRanks) { 939 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 940 } else { 941 /* The generic implementation */ 942 if (nranks) *nranks = sf->nranks; 943 if (ranks) *ranks = sf->ranks; 944 if (roffset) *roffset = sf->roffset; 945 if (rmine) *rmine = sf->rmine; 946 if (rremote) *rremote = sf->rremote; 947 } 948 PetscFunctionReturn(0); 949 } 950 951 /*@C 952 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 953 954 Not Collective 955 956 Input Parameter: 957 . sf - star forest 958 959 Output Parameters: 960 + niranks - number of leaf ranks referencing roots on this process 961 . iranks - array of ranks 962 . ioffset - offset in irootloc for each rank (length niranks+1) 963 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 964 965 Level: developer 966 967 .seealso: PetscSFGetRootRanks() 968 @*/ 969 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 970 { 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 975 PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 976 if (sf->ops->GetLeafRanks) { 977 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 978 } else { 979 PetscSFType type; 980 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 981 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 982 } 983 PetscFunctionReturn(0); 984 } 985 986 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 987 PetscInt i; 988 for (i=0; i<n; i++) { 989 if (needle == list[i]) return PETSC_TRUE; 990 } 991 return PETSC_FALSE; 992 } 993 994 /*@C 995 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 996 997 Collective 998 999 Input Parameters: 1000 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1001 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1002 1003 Level: developer 1004 1005 .seealso: PetscSFGetRootRanks() 1006 @*/ 1007 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1008 { 1009 PetscErrorCode ierr; 1010 PetscTable table; 1011 PetscTablePosition pos; 1012 PetscMPIInt size,groupsize,*groupranks; 1013 PetscInt *rcount,*ranks; 1014 PetscInt i, irank = -1,orank = -1; 1015 1016 PetscFunctionBegin; 1017 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1018 PetscSFCheckGraphSet(sf,1); 1019 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1020 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1021 for (i=0; i<sf->nleaves; i++) { 1022 /* Log 1-based rank */ 1023 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1024 } 1025 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1026 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1027 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1028 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1029 for (i=0; i<sf->nranks; i++) { 1030 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1031 ranks[i]--; /* Convert back to 0-based */ 1032 } 1033 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1034 1035 /* We expect that dgroup is reliably "small" while nranks could be large */ 1036 { 1037 MPI_Group group = MPI_GROUP_NULL; 1038 PetscMPIInt *dgroupranks; 1039 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1040 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1041 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1042 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1043 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1044 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1045 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1046 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1047 } 1048 1049 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1050 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1051 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1052 if (InList(ranks[i],groupsize,groupranks)) break; 1053 } 1054 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1055 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1056 } 1057 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1058 PetscInt tmprank,tmpcount; 1059 1060 tmprank = ranks[i]; 1061 tmpcount = rcount[i]; 1062 ranks[i] = ranks[sf->ndranks]; 1063 rcount[i] = rcount[sf->ndranks]; 1064 ranks[sf->ndranks] = tmprank; 1065 rcount[sf->ndranks] = tmpcount; 1066 sf->ndranks++; 1067 } 1068 } 1069 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1070 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1071 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1072 sf->roffset[0] = 0; 1073 for (i=0; i<sf->nranks; i++) { 1074 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1075 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1076 rcount[i] = 0; 1077 } 1078 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1079 /* short circuit */ 1080 if (orank != sf->remote[i].rank) { 1081 /* Search for index of iremote[i].rank in sf->ranks */ 1082 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1083 if (irank < 0) { 1084 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1085 if (irank >= 0) irank += sf->ndranks; 1086 } 1087 orank = sf->remote[i].rank; 1088 } 1089 PetscCheckFalse(irank < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1090 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1091 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1092 rcount[irank]++; 1093 } 1094 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 /*@C 1099 PetscSFGetGroups - gets incoming and outgoing process groups 1100 1101 Collective 1102 1103 Input Parameter: 1104 . sf - star forest 1105 1106 Output Parameters: 1107 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1108 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1109 1110 Level: developer 1111 1112 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1113 @*/ 1114 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1115 { 1116 PetscErrorCode ierr; 1117 MPI_Group group = MPI_GROUP_NULL; 1118 1119 PetscFunctionBegin; 1120 PetscCheckFalse(sf->nranks < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1121 if (sf->ingroup == MPI_GROUP_NULL) { 1122 PetscInt i; 1123 const PetscInt *indegree; 1124 PetscMPIInt rank,*outranks,*inranks; 1125 PetscSFNode *remote; 1126 PetscSF bgcount; 1127 1128 /* Compute the number of incoming ranks */ 1129 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1130 for (i=0; i<sf->nranks; i++) { 1131 remote[i].rank = sf->ranks[i]; 1132 remote[i].index = 0; 1133 } 1134 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1135 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1136 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1137 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1138 /* Enumerate the incoming ranks */ 1139 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1140 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1141 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1142 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1143 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1144 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1145 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1146 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1147 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1148 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1149 } 1150 *incoming = sf->ingroup; 1151 1152 if (sf->outgroup == MPI_GROUP_NULL) { 1153 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1154 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1155 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1156 } 1157 *outgoing = sf->outgroup; 1158 PetscFunctionReturn(0); 1159 } 1160 1161 /*@ 1162 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1163 1164 Collective 1165 1166 Input Parameter: 1167 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1168 1169 Output Parameter: 1170 . multi - star forest with split roots, such that each root has degree exactly 1 1171 1172 Level: developer 1173 1174 Notes: 1175 1176 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1177 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1178 edge, it is a candidate for future optimization that might involve its removal. 1179 1180 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1181 @*/ 1182 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1183 { 1184 PetscErrorCode ierr; 1185 1186 PetscFunctionBegin; 1187 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1188 PetscValidPointer(multi,2); 1189 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1190 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1191 *multi = sf->multi; 1192 sf->multi->multi = sf->multi; 1193 PetscFunctionReturn(0); 1194 } 1195 if (!sf->multi) { 1196 const PetscInt *indegree; 1197 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1198 PetscSFNode *remote; 1199 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1200 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1201 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1202 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1203 inoffset[0] = 0; 1204 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1205 for (i=0; i<maxlocal; i++) outones[i] = 1; 1206 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1207 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1208 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1209 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1210 for (i=0; i<sf->nroots; i++) { 1211 PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1212 } 1213 } 1214 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1215 for (i=0; i<sf->nleaves; i++) { 1216 remote[i].rank = sf->remote[i].rank; 1217 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1218 } 1219 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1220 sf->multi->multi = sf->multi; 1221 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1222 if (sf->rankorder) { /* Sort the ranks */ 1223 PetscMPIInt rank; 1224 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1225 PetscSFNode *newremote; 1226 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1227 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1228 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1229 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1230 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1231 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1232 /* Sort the incoming ranks at each vertex, build the inverse map */ 1233 for (i=0; i<sf->nroots; i++) { 1234 PetscInt j; 1235 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1236 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1237 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1238 } 1239 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1240 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1241 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1242 for (i=0; i<sf->nleaves; i++) { 1243 newremote[i].rank = sf->remote[i].rank; 1244 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1245 } 1246 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1247 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1248 } 1249 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1250 } 1251 *multi = sf->multi; 1252 PetscFunctionReturn(0); 1253 } 1254 1255 /*@C 1256 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1257 1258 Collective 1259 1260 Input Parameters: 1261 + sf - original star forest 1262 . nselected - number of selected roots on this process 1263 - selected - indices of the selected roots on this process 1264 1265 Output Parameter: 1266 . esf - new star forest 1267 1268 Level: advanced 1269 1270 Note: 1271 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1272 be done by calling PetscSFGetGraph(). 1273 1274 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1275 @*/ 1276 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1277 { 1278 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1279 const PetscInt *ilocal; 1280 signed char *rootdata,*leafdata,*leafmem; 1281 const PetscSFNode *iremote; 1282 PetscSFNode *new_iremote; 1283 MPI_Comm comm; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1288 PetscSFCheckGraphSet(sf,1); 1289 if (nselected) PetscValidPointer(selected,3); 1290 PetscValidPointer(esf,4); 1291 1292 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1293 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1294 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1295 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1296 1297 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1298 PetscBool dups; 1299 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1300 PetscCheckFalse(dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1301 for (i=0; i<nselected; i++) 1302 PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1303 } 1304 1305 if (sf->ops->CreateEmbeddedRootSF) { 1306 ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1307 } else { 1308 /* A generic version of creating embedded sf */ 1309 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1310 maxlocal = maxleaf - minleaf + 1; 1311 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1312 leafdata = leafmem - minleaf; 1313 /* Tag selected roots and bcast to leaves */ 1314 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1315 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1316 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1317 1318 /* Build esf with leaves that are still connected */ 1319 esf_nleaves = 0; 1320 for (i=0; i<nleaves; i++) { 1321 j = ilocal ? ilocal[i] : i; 1322 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1323 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1324 */ 1325 esf_nleaves += (leafdata[j] ? 1 : 0); 1326 } 1327 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1328 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1329 for (i=n=0; i<nleaves; i++) { 1330 j = ilocal ? ilocal[i] : i; 1331 if (leafdata[j]) { 1332 new_ilocal[n] = j; 1333 new_iremote[n].rank = iremote[i].rank; 1334 new_iremote[n].index = iremote[i].index; 1335 ++n; 1336 } 1337 } 1338 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1339 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1340 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1341 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1342 } 1343 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 /*@C 1348 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1349 1350 Collective 1351 1352 Input Parameters: 1353 + sf - original star forest 1354 . nselected - number of selected leaves on this process 1355 - selected - indices of the selected leaves on this process 1356 1357 Output Parameter: 1358 . newsf - new star forest 1359 1360 Level: advanced 1361 1362 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1363 @*/ 1364 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1365 { 1366 const PetscSFNode *iremote; 1367 PetscSFNode *new_iremote; 1368 const PetscInt *ilocal; 1369 PetscInt i,nroots,*leaves,*new_ilocal; 1370 MPI_Comm comm; 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1375 PetscSFCheckGraphSet(sf,1); 1376 if (nselected) PetscValidPointer(selected,3); 1377 PetscValidPointer(newsf,4); 1378 1379 /* Uniq selected[] and put results in leaves[] */ 1380 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1381 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1382 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1383 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1384 PetscCheckFalse(nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1385 1386 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1387 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1388 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1389 } else { 1390 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1391 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1392 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1393 for (i=0; i<nselected; ++i) { 1394 const PetscInt l = leaves[i]; 1395 new_ilocal[i] = ilocal ? ilocal[l] : l; 1396 new_iremote[i].rank = iremote[l].rank; 1397 new_iremote[i].index = iremote[l].index; 1398 } 1399 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1400 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1401 } 1402 ierr = PetscFree(leaves);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /*@C 1407 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1408 1409 Collective on PetscSF 1410 1411 Input Parameters: 1412 + sf - star forest on which to communicate 1413 . unit - data type associated with each node 1414 . rootdata - buffer to broadcast 1415 - op - operation to use for reduction 1416 1417 Output Parameter: 1418 . leafdata - buffer to be reduced with values from each leaf's respective root 1419 1420 Level: intermediate 1421 1422 Notes: 1423 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1424 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1425 use PetscSFBcastWithMemTypeBegin() instead. 1426 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1427 @*/ 1428 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1429 { 1430 PetscErrorCode ierr; 1431 PetscMemType rootmtype,leafmtype; 1432 1433 PetscFunctionBegin; 1434 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1435 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1436 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1437 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1438 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1439 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1440 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1441 PetscFunctionReturn(0); 1442 } 1443 1444 /*@C 1445 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1446 1447 Collective on PetscSF 1448 1449 Input Parameters: 1450 + sf - star forest on which to communicate 1451 . unit - data type associated with each node 1452 . rootmtype - memory type of rootdata 1453 . rootdata - buffer to broadcast 1454 . leafmtype - memory type of leafdata 1455 - op - operation to use for reduction 1456 1457 Output Parameter: 1458 . leafdata - buffer to be reduced with values from each leaf's respective root 1459 1460 Level: intermediate 1461 1462 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1463 @*/ 1464 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1465 { 1466 PetscErrorCode ierr; 1467 1468 PetscFunctionBegin; 1469 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1470 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1471 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1472 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1473 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1474 PetscFunctionReturn(0); 1475 } 1476 1477 /*@C 1478 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1479 1480 Collective 1481 1482 Input Parameters: 1483 + sf - star forest 1484 . unit - data type 1485 . rootdata - buffer to broadcast 1486 - op - operation to use for reduction 1487 1488 Output Parameter: 1489 . leafdata - buffer to be reduced with values from each leaf's respective root 1490 1491 Level: intermediate 1492 1493 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1494 @*/ 1495 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1496 { 1497 PetscErrorCode ierr; 1498 1499 PetscFunctionBegin; 1500 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1501 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1502 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1503 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1504 PetscFunctionReturn(0); 1505 } 1506 1507 /*@C 1508 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1509 1510 Collective 1511 1512 Input Parameters: 1513 + sf - star forest 1514 . unit - data type 1515 . leafdata - values to reduce 1516 - op - reduction operation 1517 1518 Output Parameter: 1519 . rootdata - result of reduction of values from all leaves of each root 1520 1521 Level: intermediate 1522 1523 Notes: 1524 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1525 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1526 use PetscSFReduceWithMemTypeBegin() instead. 1527 1528 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1529 @*/ 1530 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1531 { 1532 PetscErrorCode ierr; 1533 PetscMemType rootmtype,leafmtype; 1534 1535 PetscFunctionBegin; 1536 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1537 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1538 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1539 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1540 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1541 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1542 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1543 PetscFunctionReturn(0); 1544 } 1545 1546 /*@C 1547 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1548 1549 Collective 1550 1551 Input Parameters: 1552 + sf - star forest 1553 . unit - data type 1554 . leafmtype - memory type of leafdata 1555 . leafdata - values to reduce 1556 . rootmtype - memory type of rootdata 1557 - op - reduction operation 1558 1559 Output Parameter: 1560 . rootdata - result of reduction of values from all leaves of each root 1561 1562 Level: intermediate 1563 1564 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1565 @*/ 1566 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1567 { 1568 PetscErrorCode ierr; 1569 1570 PetscFunctionBegin; 1571 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1572 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1573 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1574 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1575 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1576 PetscFunctionReturn(0); 1577 } 1578 1579 /*@C 1580 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1581 1582 Collective 1583 1584 Input Parameters: 1585 + sf - star forest 1586 . unit - data type 1587 . leafdata - values to reduce 1588 - op - reduction operation 1589 1590 Output Parameter: 1591 . rootdata - result of reduction of values from all leaves of each root 1592 1593 Level: intermediate 1594 1595 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1596 @*/ 1597 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1598 { 1599 PetscErrorCode ierr; 1600 1601 PetscFunctionBegin; 1602 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1603 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1604 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1605 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1606 PetscFunctionReturn(0); 1607 } 1608 1609 /*@C 1610 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1611 1612 Collective 1613 1614 Input Parameters: 1615 + sf - star forest 1616 . unit - data type 1617 . leafdata - leaf values to use in reduction 1618 - op - operation to use for reduction 1619 1620 Output Parameters: 1621 + rootdata - root values to be updated, input state is seen by first process to perform an update 1622 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1623 1624 Level: advanced 1625 1626 Note: 1627 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1628 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1629 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1630 integers. 1631 1632 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1633 @*/ 1634 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1635 { 1636 PetscErrorCode ierr; 1637 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1638 1639 PetscFunctionBegin; 1640 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1641 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1642 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1643 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1644 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1645 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1646 PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1647 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1648 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 /*@C 1653 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1654 1655 Collective 1656 1657 Input Parameters: 1658 + sf - star forest 1659 . unit - data type 1660 . rootmtype - memory type of rootdata 1661 . leafmtype - memory type of leafdata 1662 . leafdata - leaf values to use in reduction 1663 . leafupdatemtype - memory type of leafupdate 1664 - op - operation to use for reduction 1665 1666 Output Parameters: 1667 + rootdata - root values to be updated, input state is seen by first process to perform an update 1668 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1669 1670 Level: advanced 1671 1672 Note: See PetscSFFetchAndOpBegin() for more details. 1673 1674 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1675 @*/ 1676 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op) 1677 { 1678 PetscErrorCode ierr; 1679 1680 PetscFunctionBegin; 1681 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1682 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1683 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1684 PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1685 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1686 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 /*@C 1691 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1692 1693 Collective 1694 1695 Input Parameters: 1696 + sf - star forest 1697 . unit - data type 1698 . leafdata - leaf values to use in reduction 1699 - op - operation to use for reduction 1700 1701 Output Parameters: 1702 + rootdata - root values to be updated, input state is seen by first process to perform an update 1703 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1704 1705 Level: advanced 1706 1707 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1708 @*/ 1709 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1710 { 1711 PetscErrorCode ierr; 1712 1713 PetscFunctionBegin; 1714 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1715 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1716 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1717 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1718 PetscFunctionReturn(0); 1719 } 1720 1721 /*@C 1722 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1723 1724 Collective 1725 1726 Input Parameter: 1727 . sf - star forest 1728 1729 Output Parameter: 1730 . degree - degree of each root vertex 1731 1732 Level: advanced 1733 1734 Notes: 1735 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1736 1737 .seealso: PetscSFGatherBegin() 1738 @*/ 1739 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1740 { 1741 PetscErrorCode ierr; 1742 1743 PetscFunctionBegin; 1744 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1745 PetscSFCheckGraphSet(sf,1); 1746 PetscValidPointer(degree,2); 1747 if (!sf->degreeknown) { 1748 PetscInt i, nroots = sf->nroots, maxlocal; 1749 PetscCheckFalse(sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1750 maxlocal = sf->maxleaf-sf->minleaf+1; 1751 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1752 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1753 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1754 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1755 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1756 } 1757 *degree = NULL; 1758 PetscFunctionReturn(0); 1759 } 1760 1761 /*@C 1762 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1763 1764 Collective 1765 1766 Input Parameter: 1767 . sf - star forest 1768 1769 Output Parameter: 1770 . degree - degree of each root vertex 1771 1772 Level: developer 1773 1774 Notes: 1775 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1776 1777 .seealso: 1778 @*/ 1779 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1780 { 1781 PetscErrorCode ierr; 1782 1783 PetscFunctionBegin; 1784 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1785 PetscSFCheckGraphSet(sf,1); 1786 PetscValidPointer(degree,2); 1787 if (!sf->degreeknown) { 1788 PetscCheckFalse(!sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1789 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1790 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1791 sf->degreeknown = PETSC_TRUE; 1792 } 1793 *degree = sf->degree; 1794 PetscFunctionReturn(0); 1795 } 1796 1797 /*@C 1798 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1799 Each multi-root is assigned index of the corresponding original root. 1800 1801 Collective 1802 1803 Input Parameters: 1804 + sf - star forest 1805 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1806 1807 Output Parameters: 1808 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1809 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1810 1811 Level: developer 1812 1813 Notes: 1814 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1815 1816 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1817 @*/ 1818 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1819 { 1820 PetscSF msf; 1821 PetscInt i, j, k, nroots, nmroots; 1822 PetscErrorCode ierr; 1823 1824 PetscFunctionBegin; 1825 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1826 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1827 if (nroots) PetscValidIntPointer(degree,2); 1828 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1829 PetscValidPointer(multiRootsOrigNumbering,4); 1830 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1831 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1832 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1833 for (i=0,j=0,k=0; i<nroots; i++) { 1834 if (!degree[i]) continue; 1835 for (j=0; j<degree[i]; j++,k++) { 1836 (*multiRootsOrigNumbering)[k] = i; 1837 } 1838 } 1839 PetscCheckFalse(k != nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1840 if (nMultiRoots) *nMultiRoots = nmroots; 1841 PetscFunctionReturn(0); 1842 } 1843 1844 /*@C 1845 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1846 1847 Collective 1848 1849 Input Parameters: 1850 + sf - star forest 1851 . unit - data type 1852 - leafdata - leaf data to gather to roots 1853 1854 Output Parameter: 1855 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1856 1857 Level: intermediate 1858 1859 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1860 @*/ 1861 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1862 { 1863 PetscErrorCode ierr; 1864 PetscSF multi = NULL; 1865 1866 PetscFunctionBegin; 1867 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1868 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1869 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1870 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 /*@C 1875 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1876 1877 Collective 1878 1879 Input Parameters: 1880 + sf - star forest 1881 . unit - data type 1882 - leafdata - leaf data to gather to roots 1883 1884 Output Parameter: 1885 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1886 1887 Level: intermediate 1888 1889 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1890 @*/ 1891 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1892 { 1893 PetscErrorCode ierr; 1894 PetscSF multi = NULL; 1895 1896 PetscFunctionBegin; 1897 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1898 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1899 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1900 PetscFunctionReturn(0); 1901 } 1902 1903 /*@C 1904 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1905 1906 Collective 1907 1908 Input Parameters: 1909 + sf - star forest 1910 . unit - data type 1911 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1912 1913 Output Parameter: 1914 . leafdata - leaf data to be update with personal data from each respective root 1915 1916 Level: intermediate 1917 1918 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1919 @*/ 1920 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1921 { 1922 PetscErrorCode ierr; 1923 PetscSF multi = NULL; 1924 1925 PetscFunctionBegin; 1926 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1927 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1928 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1929 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 /*@C 1934 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1935 1936 Collective 1937 1938 Input Parameters: 1939 + sf - star forest 1940 . unit - data type 1941 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1942 1943 Output Parameter: 1944 . leafdata - leaf data to be update with personal data from each respective root 1945 1946 Level: intermediate 1947 1948 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1949 @*/ 1950 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1951 { 1952 PetscErrorCode ierr; 1953 PetscSF multi = NULL; 1954 1955 PetscFunctionBegin; 1956 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1957 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1958 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1963 { 1964 PetscInt i, n, nleaves; 1965 const PetscInt *ilocal = NULL; 1966 PetscHSetI seen; 1967 PetscErrorCode ierr; 1968 1969 PetscFunctionBegin; 1970 if (PetscDefined(USE_DEBUG)) { 1971 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1972 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1973 for (i = 0; i < nleaves; i++) { 1974 const PetscInt leaf = ilocal ? ilocal[i] : i; 1975 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1976 } 1977 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1978 PetscCheckFalse(n != nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1979 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1980 } 1981 PetscFunctionReturn(0); 1982 } 1983 1984 /*@ 1985 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1986 1987 Input Parameters: 1988 + sfA - The first PetscSF 1989 - sfB - The second PetscSF 1990 1991 Output Parameters: 1992 . sfBA - The composite SF 1993 1994 Level: developer 1995 1996 Notes: 1997 Currently, the two SFs must be defined on congruent communicators and they must be true star 1998 forests, i.e. the same leaf is not connected with different roots. 1999 2000 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 2001 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 2002 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 2003 Bcast on sfA, then a Bcast on sfB, on connected nodes. 2004 2005 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 2006 @*/ 2007 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2008 { 2009 PetscErrorCode ierr; 2010 const PetscSFNode *remotePointsA,*remotePointsB; 2011 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 2012 const PetscInt *localPointsA,*localPointsB; 2013 PetscInt *localPointsBA; 2014 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 2015 PetscBool denseB; 2016 2017 PetscFunctionBegin; 2018 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2019 PetscSFCheckGraphSet(sfA,1); 2020 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2021 PetscSFCheckGraphSet(sfB,2); 2022 PetscCheckSameComm(sfA,1,sfB,2); 2023 PetscValidPointer(sfBA,3); 2024 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2025 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2026 2027 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2028 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2029 if (localPointsA) { 2030 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2031 for (i=0; i<numRootsB; i++) { 2032 reorderedRemotePointsA[i].rank = -1; 2033 reorderedRemotePointsA[i].index = -1; 2034 } 2035 for (i=0; i<numLeavesA; i++) { 2036 if (localPointsA[i] >= numRootsB) continue; 2037 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2038 } 2039 remotePointsA = reorderedRemotePointsA; 2040 } 2041 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2042 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2043 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2044 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2045 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2046 2047 denseB = (PetscBool)!localPointsB; 2048 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2049 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2050 else numLeavesBA++; 2051 } 2052 if (denseB) { 2053 localPointsBA = NULL; 2054 remotePointsBA = leafdataB; 2055 } else { 2056 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2057 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2058 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2059 const PetscInt l = localPointsB ? localPointsB[i] : i; 2060 2061 if (leafdataB[l-minleaf].rank == -1) continue; 2062 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2063 localPointsBA[numLeavesBA] = l; 2064 numLeavesBA++; 2065 } 2066 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2067 } 2068 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2069 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2070 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 /*@ 2075 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2076 2077 Input Parameters: 2078 + sfA - The first PetscSF 2079 - sfB - The second PetscSF 2080 2081 Output Parameters: 2082 . sfBA - The composite SF. 2083 2084 Level: developer 2085 2086 Notes: 2087 Currently, the two SFs must be defined on congruent communicators and they must be true star 2088 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2089 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2090 2091 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2092 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2093 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2094 a Reduce on sfB, on connected roots. 2095 2096 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2097 @*/ 2098 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2099 { 2100 PetscErrorCode ierr; 2101 const PetscSFNode *remotePointsA,*remotePointsB; 2102 PetscSFNode *remotePointsBA; 2103 const PetscInt *localPointsA,*localPointsB; 2104 PetscSFNode *reorderedRemotePointsA = NULL; 2105 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2106 MPI_Op op; 2107 #if defined(PETSC_USE_64BIT_INDICES) 2108 PetscBool iswin; 2109 #endif 2110 2111 PetscFunctionBegin; 2112 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2113 PetscSFCheckGraphSet(sfA,1); 2114 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2115 PetscSFCheckGraphSet(sfB,2); 2116 PetscCheckSameComm(sfA,1,sfB,2); 2117 PetscValidPointer(sfBA,3); 2118 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2119 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2120 2121 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2122 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2123 2124 /* TODO: Check roots of sfB have degree of 1 */ 2125 /* Once we implement it, we can replace the MPI_MAXLOC 2126 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2127 We use MPI_MAXLOC only to have a deterministic output from this routine if 2128 the root condition is not meet. 2129 */ 2130 op = MPI_MAXLOC; 2131 #if defined(PETSC_USE_64BIT_INDICES) 2132 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2133 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2134 if (iswin) op = MPI_REPLACE; 2135 #endif 2136 2137 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2138 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2139 for (i=0; i<maxleaf - minleaf + 1; i++) { 2140 reorderedRemotePointsA[i].rank = -1; 2141 reorderedRemotePointsA[i].index = -1; 2142 } 2143 if (localPointsA) { 2144 for (i=0; i<numLeavesA; i++) { 2145 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2146 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2147 } 2148 } else { 2149 for (i=0; i<numLeavesA; i++) { 2150 if (i > maxleaf || i < minleaf) continue; 2151 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2152 } 2153 } 2154 2155 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2156 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2157 for (i=0; i<numRootsB; i++) { 2158 remotePointsBA[i].rank = -1; 2159 remotePointsBA[i].index = -1; 2160 } 2161 2162 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2163 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2164 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2165 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2166 if (remotePointsBA[i].rank == -1) continue; 2167 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2168 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2169 localPointsBA[numLeavesBA] = i; 2170 numLeavesBA++; 2171 } 2172 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2173 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2174 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 /* 2179 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2180 2181 Input Parameters: 2182 . sf - The global PetscSF 2183 2184 Output Parameters: 2185 . out - The local PetscSF 2186 */ 2187 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2188 { 2189 MPI_Comm comm; 2190 PetscMPIInt myrank; 2191 const PetscInt *ilocal; 2192 const PetscSFNode *iremote; 2193 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2194 PetscSFNode *liremote; 2195 PetscSF lsf; 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2200 if (sf->ops->CreateLocalSF) { 2201 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2202 } else { 2203 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2204 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2205 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2206 2207 /* Find out local edges and build a local SF */ 2208 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2209 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2210 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2211 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2212 2213 for (i=j=0; i<nleaves; i++) { 2214 if (iremote[i].rank == (PetscInt)myrank) { 2215 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2216 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2217 liremote[j].index = iremote[i].index; 2218 j++; 2219 } 2220 } 2221 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2222 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2223 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2224 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2225 *out = lsf; 2226 } 2227 PetscFunctionReturn(0); 2228 } 2229 2230 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2231 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2232 { 2233 PetscErrorCode ierr; 2234 PetscMemType rootmtype,leafmtype; 2235 2236 PetscFunctionBegin; 2237 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2238 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2239 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2240 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2241 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2242 if (sf->ops->BcastToZero) { 2243 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2244 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2245 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2246 PetscFunctionReturn(0); 2247 } 2248 2249