1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_HAVE_CUDA) 6 #include <cuda_runtime.h> 7 #endif 8 9 #if defined(PETSC_HAVE_HIP) 10 #include <hip/hip_runtime.h> 11 #endif 12 13 #if defined(PETSC_USE_DEBUG) 14 # define PetscSFCheckGraphSet(sf,arg) do { \ 15 if (PetscUnlikely(!(sf)->graphset)) \ 16 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 17 } while (0) 18 #else 19 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 20 #endif 21 22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 23 24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 25 { 26 PetscFunctionBegin; 27 PetscValidPointer(type,2); 28 *type = PETSC_MEMTYPE_HOST; 29 #if defined(PETSC_HAVE_CUDA) 30 if (PetscCUDAInitialized && data) { 31 cudaError_t cerr; 32 struct cudaPointerAttributes attr; 33 enum cudaMemoryType mtype; 34 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 35 cudaGetLastError(); /* Reset the last error */ 36 #if (CUDART_VERSION < 10000) 37 mtype = attr.memoryType; 38 #else 39 mtype = attr.type; 40 #endif 41 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 42 } 43 #endif 44 45 #if defined(PETSC_HAVE_HIP) 46 if (PetscHIPInitialized && data) { 47 hipError_t cerr; 48 struct hipPointerAttribute_t attr; 49 enum hipMemoryType mtype; 50 cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 51 hipGetLastError(); /* Reset the last error */ 52 mtype = attr.memoryType; 53 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 54 } 55 #endif 56 PetscFunctionReturn(0); 57 } 58 59 /*@ 60 PetscSFCreate - create a star forest communication context 61 62 Collective 63 64 Input Arguments: 65 . comm - communicator on which the star forest will operate 66 67 Output Arguments: 68 . sf - new star forest context 69 70 Options Database Keys: 71 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 72 . -sf_type window -Use MPI-3 one-sided window for communication 73 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 74 75 Level: intermediate 76 77 Notes: 78 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 79 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 80 SFs are optimized and they have better performance than general SFs. 81 82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 83 @*/ 84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 85 { 86 PetscErrorCode ierr; 87 PetscSF b; 88 89 PetscFunctionBegin; 90 PetscValidPointer(sf,2); 91 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 92 93 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 94 95 b->nroots = -1; 96 b->nleaves = -1; 97 b->minleaf = PETSC_MAX_INT; 98 b->maxleaf = PETSC_MIN_INT; 99 b->nranks = -1; 100 b->rankorder = PETSC_TRUE; 101 b->ingroup = MPI_GROUP_NULL; 102 b->outgroup = MPI_GROUP_NULL; 103 b->graphset = PETSC_FALSE; 104 #if defined(PETSC_HAVE_DEVICE) 105 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 106 b->use_stream_aware_mpi = PETSC_FALSE; 107 b->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 108 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 109 b->backend = PETSCSF_BACKEND_KOKKOS; 110 #elif defined(PETSC_HAVE_CUDA) 111 b->backend = PETSCSF_BACKEND_CUDA; 112 #endif 113 #endif 114 *sf = b; 115 PetscFunctionReturn(0); 116 } 117 118 /*@ 119 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 120 121 Collective 122 123 Input Arguments: 124 . sf - star forest 125 126 Level: advanced 127 128 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 129 @*/ 130 PetscErrorCode PetscSFReset(PetscSF sf) 131 { 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 136 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 137 sf->nroots = -1; 138 sf->nleaves = -1; 139 sf->minleaf = PETSC_MAX_INT; 140 sf->maxleaf = PETSC_MIN_INT; 141 sf->mine = NULL; 142 sf->remote = NULL; 143 sf->graphset = PETSC_FALSE; 144 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 145 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 146 sf->nranks = -1; 147 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 148 #if defined(PETSC_HAVE_DEVICE) 149 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 150 #endif 151 sf->degreeknown = PETSC_FALSE; 152 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 153 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 154 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 155 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 156 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 157 sf->setupcalled = PETSC_FALSE; 158 PetscFunctionReturn(0); 159 } 160 161 /*@C 162 PetscSFSetType - Set the PetscSF communication implementation 163 164 Collective on PetscSF 165 166 Input Parameters: 167 + sf - the PetscSF context 168 - type - a known method 169 170 Options Database Key: 171 . -sf_type <type> - Sets the method; use -help for a list 172 of available methods (for instance, window, basic, neighbor) 173 174 Notes: 175 See "include/petscsf.h" for available methods (for instance) 176 + PETSCSFWINDOW - MPI-2/3 one-sided 177 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 178 179 Level: intermediate 180 181 .seealso: PetscSFType, PetscSFCreate() 182 @*/ 183 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 184 { 185 PetscErrorCode ierr,(*r)(PetscSF); 186 PetscBool match; 187 188 PetscFunctionBegin; 189 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 190 PetscValidCharPointer(type,2); 191 192 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 193 if (match) PetscFunctionReturn(0); 194 195 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 196 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 197 /* Destroy the previous PetscSF implementation context */ 198 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 199 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 200 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 201 ierr = (*r)(sf);CHKERRQ(ierr); 202 PetscFunctionReturn(0); 203 } 204 205 /*@C 206 PetscSFGetType - Get the PetscSF communication implementation 207 208 Not Collective 209 210 Input Parameter: 211 . sf - the PetscSF context 212 213 Output Parameter: 214 . type - the PetscSF type name 215 216 Level: intermediate 217 218 .seealso: PetscSFSetType(), PetscSFCreate() 219 @*/ 220 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 221 { 222 PetscFunctionBegin; 223 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 224 PetscValidPointer(type,2); 225 *type = ((PetscObject)sf)->type_name; 226 PetscFunctionReturn(0); 227 } 228 229 /*@ 230 PetscSFDestroy - destroy star forest 231 232 Collective 233 234 Input Arguments: 235 . sf - address of star forest 236 237 Level: intermediate 238 239 .seealso: PetscSFCreate(), PetscSFReset() 240 @*/ 241 PetscErrorCode PetscSFDestroy(PetscSF *sf) 242 { 243 PetscErrorCode ierr; 244 245 PetscFunctionBegin; 246 if (!*sf) PetscFunctionReturn(0); 247 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 248 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 249 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 250 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 251 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 256 { 257 PetscInt i, nleaves; 258 PetscMPIInt size; 259 const PetscInt *ilocal; 260 const PetscSFNode *iremote; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 265 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 266 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 267 for (i = 0; i < nleaves; i++) { 268 const PetscInt rank = iremote[i].rank; 269 const PetscInt remote = iremote[i].index; 270 const PetscInt leaf = ilocal ? ilocal[i] : i; 271 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 272 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 273 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 274 } 275 PetscFunctionReturn(0); 276 } 277 278 279 280 281 /*@ 282 PetscSFSetUp - set up communication structures 283 284 Collective 285 286 Input Arguments: 287 . sf - star forest communication object 288 289 Level: beginner 290 291 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 292 @*/ 293 PetscErrorCode PetscSFSetUp(PetscSF sf) 294 { 295 PetscErrorCode ierr; 296 297 PetscFunctionBegin; 298 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 299 PetscSFCheckGraphSet(sf,1); 300 if (sf->setupcalled) PetscFunctionReturn(0); 301 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 302 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 303 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 304 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 305 #if defined(PETSC_HAVE_CUDA) 306 if (sf->backend == PETSCSF_BACKEND_CUDA) { 307 sf->ops->Malloc = PetscSFMalloc_Cuda; 308 sf->ops->Free = PetscSFFree_Cuda; 309 } 310 #endif 311 312 #if defined(PETSC_HAVE_KOKKOS) 313 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 314 sf->ops->Malloc = PetscSFMalloc_Kokkos; 315 sf->ops->Free = PetscSFFree_Kokkos; 316 } 317 #endif 318 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 319 sf->setupcalled = PETSC_TRUE; 320 PetscFunctionReturn(0); 321 } 322 323 /*@ 324 PetscSFSetFromOptions - set PetscSF options using the options database 325 326 Logically Collective 327 328 Input Arguments: 329 . sf - star forest 330 331 Options Database Keys: 332 + -sf_type - implementation type, see PetscSFSetType() 333 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 334 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 335 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 336 If true, this option only works with -use_gpu_aware_mpi 1. 337 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 338 If true, this option only works with -use_gpu_aware_mpi 1. 339 340 - -sf_backend cuda | kokkos -Select the device backend SF uses. Currently SF has two backends: cuda and Kokkos. 341 On CUDA devices, one can choose cuda or kokkos with the default being cuda. On other devices, 342 the only available is kokkos. 343 344 Level: intermediate 345 @*/ 346 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 347 { 348 PetscSFType deft; 349 char type[256]; 350 PetscErrorCode ierr; 351 PetscBool flg; 352 353 PetscFunctionBegin; 354 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 355 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 356 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 357 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 358 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 359 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 360 #if defined(PETSC_HAVE_DEVICE) 361 { 362 char backendstr[32] = {0}; 363 PetscBool isCuda = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 364 /* Change the defaults set in PetscSFCreate() with command line options */ 365 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 366 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 367 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 368 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 369 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 370 #if defined(PETSC_HAVE_CUDA) 371 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 372 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 373 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported on cuda devices. You may choose cuda or kokkos (if installed)", backendstr); 374 #elif defined(PETSC_HAVE_KOKKOS) 375 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 376 #endif 377 } 378 #endif 379 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 380 ierr = PetscOptionsEnd();CHKERRQ(ierr); 381 PetscFunctionReturn(0); 382 } 383 384 /*@ 385 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 386 387 Logically Collective 388 389 Input Arguments: 390 + sf - star forest 391 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 392 393 Level: advanced 394 395 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 396 @*/ 397 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 398 { 399 PetscFunctionBegin; 400 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 401 PetscValidLogicalCollectiveBool(sf,flg,2); 402 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 403 sf->rankorder = flg; 404 PetscFunctionReturn(0); 405 } 406 407 /*@ 408 PetscSFSetGraph - Set a parallel star forest 409 410 Collective 411 412 Input Arguments: 413 + sf - star forest 414 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 415 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 416 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 417 during setup in debug mode) 418 . localmode - copy mode for ilocal 419 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 420 during setup in debug mode) 421 - remotemode - copy mode for iremote 422 423 Level: intermediate 424 425 Notes: 426 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 427 428 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 429 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 430 needed 431 432 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 433 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 434 435 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 436 @*/ 437 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 438 { 439 PetscErrorCode ierr; 440 441 PetscFunctionBegin; 442 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 443 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 444 if (nleaves > 0) PetscValidPointer(iremote,6); 445 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 446 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 447 448 if (sf->nroots >= 0) { /* Reset only if graph already set */ 449 ierr = PetscSFReset(sf);CHKERRQ(ierr); 450 } 451 452 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 453 454 sf->nroots = nroots; 455 sf->nleaves = nleaves; 456 457 if (nleaves && ilocal) { 458 PetscInt i; 459 PetscInt minleaf = PETSC_MAX_INT; 460 PetscInt maxleaf = PETSC_MIN_INT; 461 int contiguous = 1; 462 for (i=0; i<nleaves; i++) { 463 minleaf = PetscMin(minleaf,ilocal[i]); 464 maxleaf = PetscMax(maxleaf,ilocal[i]); 465 contiguous &= (ilocal[i] == i); 466 } 467 sf->minleaf = minleaf; 468 sf->maxleaf = maxleaf; 469 if (contiguous) { 470 if (localmode == PETSC_OWN_POINTER) { 471 ierr = PetscFree(ilocal);CHKERRQ(ierr); 472 } 473 ilocal = NULL; 474 } 475 } else { 476 sf->minleaf = 0; 477 sf->maxleaf = nleaves - 1; 478 } 479 480 if (ilocal) { 481 switch (localmode) { 482 case PETSC_COPY_VALUES: 483 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 484 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 485 sf->mine = sf->mine_alloc; 486 break; 487 case PETSC_OWN_POINTER: 488 sf->mine_alloc = (PetscInt*)ilocal; 489 sf->mine = sf->mine_alloc; 490 break; 491 case PETSC_USE_POINTER: 492 sf->mine_alloc = NULL; 493 sf->mine = (PetscInt*)ilocal; 494 break; 495 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 496 } 497 } 498 499 switch (remotemode) { 500 case PETSC_COPY_VALUES: 501 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 502 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 503 sf->remote = sf->remote_alloc; 504 break; 505 case PETSC_OWN_POINTER: 506 sf->remote_alloc = (PetscSFNode*)iremote; 507 sf->remote = sf->remote_alloc; 508 break; 509 case PETSC_USE_POINTER: 510 sf->remote_alloc = NULL; 511 sf->remote = (PetscSFNode*)iremote; 512 break; 513 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 514 } 515 516 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 517 sf->graphset = PETSC_TRUE; 518 PetscFunctionReturn(0); 519 } 520 521 /*@ 522 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 523 524 Collective 525 526 Input Parameters: 527 + sf - The PetscSF 528 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 529 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 530 531 Notes: 532 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 533 n and N are local and global sizes of x respectively. 534 535 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 536 sequential vectors y on all ranks. 537 538 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 539 sequential vector y on rank 0. 540 541 In above cases, entries of x are roots and entries of y are leaves. 542 543 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 544 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 545 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 546 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 547 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 548 549 In this case, roots and leaves are symmetric. 550 551 Level: intermediate 552 @*/ 553 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 554 { 555 MPI_Comm comm; 556 PetscInt n,N,res[2]; 557 PetscMPIInt rank,size; 558 PetscSFType type; 559 PetscErrorCode ierr; 560 561 PetscFunctionBegin; 562 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 563 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 564 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 565 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 566 567 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 568 type = PETSCSFALLTOALL; 569 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 570 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 571 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 572 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 573 } else { 574 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 575 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 576 res[0] = n; 577 res[1] = -n; 578 /* Check if n are same over all ranks so that we can optimize it */ 579 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 580 if (res[0] == -res[1]) { /* same n */ 581 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 582 } else { 583 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 584 } 585 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 586 } 587 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 588 589 sf->pattern = pattern; 590 sf->mine = NULL; /* Contiguous */ 591 592 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 593 Also set other easy stuff. 594 */ 595 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 596 sf->nleaves = N; 597 sf->nroots = n; 598 sf->nranks = size; 599 sf->minleaf = 0; 600 sf->maxleaf = N - 1; 601 } else if (pattern == PETSCSF_PATTERN_GATHER) { 602 sf->nleaves = rank ? 0 : N; 603 sf->nroots = n; 604 sf->nranks = rank ? 0 : size; 605 sf->minleaf = 0; 606 sf->maxleaf = rank ? -1 : N - 1; 607 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 608 sf->nleaves = size; 609 sf->nroots = size; 610 sf->nranks = size; 611 sf->minleaf = 0; 612 sf->maxleaf = size - 1; 613 } 614 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 615 sf->graphset = PETSC_TRUE; 616 PetscFunctionReturn(0); 617 } 618 619 /*@ 620 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 621 622 Collective 623 624 Input Arguments: 625 626 . sf - star forest to invert 627 628 Output Arguments: 629 . isf - inverse of sf 630 Level: advanced 631 632 Notes: 633 All roots must have degree 1. 634 635 The local space may be a permutation, but cannot be sparse. 636 637 .seealso: PetscSFSetGraph() 638 @*/ 639 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 640 { 641 PetscErrorCode ierr; 642 PetscMPIInt rank; 643 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 644 const PetscInt *ilocal; 645 PetscSFNode *roots,*leaves; 646 647 PetscFunctionBegin; 648 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 649 PetscSFCheckGraphSet(sf,1); 650 PetscValidPointer(isf,2); 651 652 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 653 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 654 655 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 656 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 657 for (i=0; i<maxlocal; i++) { 658 leaves[i].rank = rank; 659 leaves[i].index = i; 660 } 661 for (i=0; i <nroots; i++) { 662 roots[i].rank = -1; 663 roots[i].index = -1; 664 } 665 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 666 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 667 668 /* Check whether our leaves are sparse */ 669 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 670 if (count == nroots) newilocal = NULL; 671 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 672 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 673 for (i=0,count=0; i<nroots; i++) { 674 if (roots[i].rank >= 0) { 675 newilocal[count] = i; 676 roots[count].rank = roots[i].rank; 677 roots[count].index = roots[i].index; 678 count++; 679 } 680 } 681 } 682 683 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 684 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 685 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 /*@ 690 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 691 692 Collective 693 694 Input Arguments: 695 + sf - communication object to duplicate 696 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 697 698 Output Arguments: 699 . newsf - new communication object 700 701 Level: beginner 702 703 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 704 @*/ 705 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 706 { 707 PetscSFType type; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 712 PetscValidLogicalCollectiveEnum(sf,opt,2); 713 PetscValidPointer(newsf,3); 714 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 715 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 716 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 717 if (opt == PETSCSF_DUPLICATE_GRAPH) { 718 PetscSFCheckGraphSet(sf,1); 719 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 720 PetscInt nroots,nleaves; 721 const PetscInt *ilocal; 722 const PetscSFNode *iremote; 723 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 724 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 725 } else { 726 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 727 } 728 } 729 #if defined(PETSC_HAVE_DEVICE) 730 (*newsf)->backend = sf->backend; 731 (*newsf)->use_default_stream = sf->use_default_stream; 732 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 733 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 734 #endif 735 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 736 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 737 PetscFunctionReturn(0); 738 } 739 740 /*@C 741 PetscSFGetGraph - Get the graph specifying a parallel star forest 742 743 Not Collective 744 745 Input Arguments: 746 . sf - star forest 747 748 Output Arguments: 749 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 750 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 751 . ilocal - locations of leaves in leafdata buffers 752 - iremote - remote locations of root vertices for each leaf on the current process 753 754 Notes: 755 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 756 757 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 758 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 759 760 Level: intermediate 761 762 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 763 @*/ 764 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 765 { 766 PetscErrorCode ierr; 767 768 PetscFunctionBegin; 769 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 770 if (sf->ops->GetGraph) { 771 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 772 } else { 773 if (nroots) *nroots = sf->nroots; 774 if (nleaves) *nleaves = sf->nleaves; 775 if (ilocal) *ilocal = sf->mine; 776 if (iremote) *iremote = sf->remote; 777 } 778 PetscFunctionReturn(0); 779 } 780 781 /*@ 782 PetscSFGetLeafRange - Get the active leaf ranges 783 784 Not Collective 785 786 Input Arguments: 787 . sf - star forest 788 789 Output Arguments: 790 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 791 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 792 793 Level: developer 794 795 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 796 @*/ 797 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 798 { 799 PetscFunctionBegin; 800 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 801 PetscSFCheckGraphSet(sf,1); 802 if (minleaf) *minleaf = sf->minleaf; 803 if (maxleaf) *maxleaf = sf->maxleaf; 804 PetscFunctionReturn(0); 805 } 806 807 /*@C 808 PetscSFViewFromOptions - View from Options 809 810 Collective on PetscSF 811 812 Input Parameters: 813 + A - the star forest 814 . obj - Optional object 815 - name - command line option 816 817 Level: intermediate 818 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 819 @*/ 820 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 821 { 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 826 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 /*@C 831 PetscSFView - view a star forest 832 833 Collective 834 835 Input Arguments: 836 + sf - star forest 837 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 838 839 Level: beginner 840 841 .seealso: PetscSFCreate(), PetscSFSetGraph() 842 @*/ 843 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 844 { 845 PetscErrorCode ierr; 846 PetscBool iascii; 847 PetscViewerFormat format; 848 849 PetscFunctionBegin; 850 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 851 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 852 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 853 PetscCheckSameComm(sf,1,viewer,2); 854 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 855 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 856 if (iascii) { 857 PetscMPIInt rank; 858 PetscInt ii,i,j; 859 860 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 861 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 862 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 863 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 864 if (!sf->graphset) { 865 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 866 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 870 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 872 for (i=0; i<sf->nleaves; i++) { 873 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 874 } 875 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 876 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 877 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 878 PetscMPIInt *tmpranks,*perm; 879 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 880 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 881 for (i=0; i<sf->nranks; i++) perm[i] = i; 882 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 883 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 884 for (ii=0; ii<sf->nranks; ii++) { 885 i = perm[ii]; 886 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 887 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 889 } 890 } 891 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 892 } 893 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 895 } 896 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 897 } 898 PetscFunctionReturn(0); 899 } 900 901 /*@C 902 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 903 904 Not Collective 905 906 Input Arguments: 907 . sf - star forest 908 909 Output Arguments: 910 + nranks - number of ranks referenced by local part 911 . ranks - array of ranks 912 . roffset - offset in rmine/rremote for each rank (length nranks+1) 913 . rmine - concatenated array holding local indices referencing each remote rank 914 - rremote - concatenated array holding remote indices referenced for each remote rank 915 916 Level: developer 917 918 .seealso: PetscSFGetLeafRanks() 919 @*/ 920 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 921 { 922 PetscErrorCode ierr; 923 924 PetscFunctionBegin; 925 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 926 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 927 if (sf->ops->GetRootRanks) { 928 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 929 } else { 930 /* The generic implementation */ 931 if (nranks) *nranks = sf->nranks; 932 if (ranks) *ranks = sf->ranks; 933 if (roffset) *roffset = sf->roffset; 934 if (rmine) *rmine = sf->rmine; 935 if (rremote) *rremote = sf->rremote; 936 } 937 PetscFunctionReturn(0); 938 } 939 940 /*@C 941 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 942 943 Not Collective 944 945 Input Arguments: 946 . sf - star forest 947 948 Output Arguments: 949 + niranks - number of leaf ranks referencing roots on this process 950 . iranks - array of ranks 951 . ioffset - offset in irootloc for each rank (length niranks+1) 952 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 953 954 Level: developer 955 956 .seealso: PetscSFGetRootRanks() 957 @*/ 958 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 959 { 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 964 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 965 if (sf->ops->GetLeafRanks) { 966 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 967 } else { 968 PetscSFType type; 969 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 970 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 976 PetscInt i; 977 for (i=0; i<n; i++) { 978 if (needle == list[i]) return PETSC_TRUE; 979 } 980 return PETSC_FALSE; 981 } 982 983 /*@C 984 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 985 986 Collective 987 988 Input Arguments: 989 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 990 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 991 992 Level: developer 993 994 .seealso: PetscSFGetRootRanks() 995 @*/ 996 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 997 { 998 PetscErrorCode ierr; 999 PetscTable table; 1000 PetscTablePosition pos; 1001 PetscMPIInt size,groupsize,*groupranks; 1002 PetscInt *rcount,*ranks; 1003 PetscInt i, irank = -1,orank = -1; 1004 1005 PetscFunctionBegin; 1006 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1007 PetscSFCheckGraphSet(sf,1); 1008 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 1009 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1010 for (i=0; i<sf->nleaves; i++) { 1011 /* Log 1-based rank */ 1012 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1013 } 1014 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1015 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1016 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1017 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1018 for (i=0; i<sf->nranks; i++) { 1019 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1020 ranks[i]--; /* Convert back to 0-based */ 1021 } 1022 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1023 1024 /* We expect that dgroup is reliably "small" while nranks could be large */ 1025 { 1026 MPI_Group group = MPI_GROUP_NULL; 1027 PetscMPIInt *dgroupranks; 1028 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1029 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 1030 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1031 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1032 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1033 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 1034 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1035 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1036 } 1037 1038 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1039 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1040 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1041 if (InList(ranks[i],groupsize,groupranks)) break; 1042 } 1043 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1044 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1045 } 1046 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1047 PetscInt tmprank,tmpcount; 1048 1049 tmprank = ranks[i]; 1050 tmpcount = rcount[i]; 1051 ranks[i] = ranks[sf->ndranks]; 1052 rcount[i] = rcount[sf->ndranks]; 1053 ranks[sf->ndranks] = tmprank; 1054 rcount[sf->ndranks] = tmpcount; 1055 sf->ndranks++; 1056 } 1057 } 1058 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1059 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1060 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1061 sf->roffset[0] = 0; 1062 for (i=0; i<sf->nranks; i++) { 1063 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1064 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1065 rcount[i] = 0; 1066 } 1067 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1068 /* short circuit */ 1069 if (orank != sf->remote[i].rank) { 1070 /* Search for index of iremote[i].rank in sf->ranks */ 1071 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1072 if (irank < 0) { 1073 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1074 if (irank >= 0) irank += sf->ndranks; 1075 } 1076 orank = sf->remote[i].rank; 1077 } 1078 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1079 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1080 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1081 rcount[irank]++; 1082 } 1083 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /*@C 1088 PetscSFGetGroups - gets incoming and outgoing process groups 1089 1090 Collective 1091 1092 Input Argument: 1093 . sf - star forest 1094 1095 Output Arguments: 1096 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1097 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1098 1099 Level: developer 1100 1101 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1102 @*/ 1103 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1104 { 1105 PetscErrorCode ierr; 1106 MPI_Group group = MPI_GROUP_NULL; 1107 1108 PetscFunctionBegin; 1109 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1110 if (sf->ingroup == MPI_GROUP_NULL) { 1111 PetscInt i; 1112 const PetscInt *indegree; 1113 PetscMPIInt rank,*outranks,*inranks; 1114 PetscSFNode *remote; 1115 PetscSF bgcount; 1116 1117 /* Compute the number of incoming ranks */ 1118 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1119 for (i=0; i<sf->nranks; i++) { 1120 remote[i].rank = sf->ranks[i]; 1121 remote[i].index = 0; 1122 } 1123 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1124 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1125 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1126 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1127 /* Enumerate the incoming ranks */ 1128 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1129 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1130 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1131 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1132 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1133 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1134 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1135 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1136 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1137 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1138 } 1139 *incoming = sf->ingroup; 1140 1141 if (sf->outgroup == MPI_GROUP_NULL) { 1142 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1143 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1144 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1145 } 1146 *outgoing = sf->outgroup; 1147 PetscFunctionReturn(0); 1148 } 1149 1150 /*@ 1151 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1152 1153 Collective 1154 1155 Input Argument: 1156 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1157 1158 Output Arguments: 1159 . multi - star forest with split roots, such that each root has degree exactly 1 1160 1161 Level: developer 1162 1163 Notes: 1164 1165 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1166 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1167 edge, it is a candidate for future optimization that might involve its removal. 1168 1169 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1170 @*/ 1171 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1172 { 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1177 PetscValidPointer(multi,2); 1178 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1179 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1180 *multi = sf->multi; 1181 PetscFunctionReturn(0); 1182 } 1183 if (!sf->multi) { 1184 const PetscInt *indegree; 1185 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1186 PetscSFNode *remote; 1187 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1188 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1189 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1190 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1191 inoffset[0] = 0; 1192 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1193 for (i=0; i<maxlocal; i++) outones[i] = 1; 1194 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1195 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1196 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1197 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1198 for (i=0; i<sf->nroots; i++) { 1199 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1200 } 1201 } 1202 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1203 for (i=0; i<sf->nleaves; i++) { 1204 remote[i].rank = sf->remote[i].rank; 1205 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1206 } 1207 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1208 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1209 if (sf->rankorder) { /* Sort the ranks */ 1210 PetscMPIInt rank; 1211 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1212 PetscSFNode *newremote; 1213 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1214 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1215 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1216 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1217 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1218 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1219 /* Sort the incoming ranks at each vertex, build the inverse map */ 1220 for (i=0; i<sf->nroots; i++) { 1221 PetscInt j; 1222 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1223 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1224 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1225 } 1226 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1227 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1228 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1229 for (i=0; i<sf->nleaves; i++) { 1230 newremote[i].rank = sf->remote[i].rank; 1231 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1232 } 1233 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1234 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1235 } 1236 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1237 } 1238 *multi = sf->multi; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 /*@C 1243 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1244 1245 Collective 1246 1247 Input Arguments: 1248 + sf - original star forest 1249 . nselected - number of selected roots on this process 1250 - selected - indices of the selected roots on this process 1251 1252 Output Arguments: 1253 . esf - new star forest 1254 1255 Level: advanced 1256 1257 Note: 1258 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1259 be done by calling PetscSFGetGraph(). 1260 1261 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1262 @*/ 1263 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1264 { 1265 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1266 const PetscInt *ilocal; 1267 signed char *rootdata,*leafdata,*leafmem; 1268 const PetscSFNode *iremote; 1269 PetscSFNode *new_iremote; 1270 MPI_Comm comm; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1275 PetscSFCheckGraphSet(sf,1); 1276 if (nselected) PetscValidPointer(selected,3); 1277 PetscValidPointer(esf,4); 1278 1279 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1280 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1281 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1282 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1283 1284 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1285 PetscBool dups; 1286 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1287 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1288 for (i=0; i<nselected; i++) 1289 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1290 } 1291 1292 if (sf->ops->CreateEmbeddedSF) { 1293 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1294 } else { 1295 /* A generic version of creating embedded sf */ 1296 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1297 maxlocal = maxleaf - minleaf + 1; 1298 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1299 leafdata = leafmem - minleaf; 1300 /* Tag selected roots and bcast to leaves */ 1301 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1302 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1303 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1304 1305 /* Build esf with leaves that are still connected */ 1306 esf_nleaves = 0; 1307 for (i=0; i<nleaves; i++) { 1308 j = ilocal ? ilocal[i] : i; 1309 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1310 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1311 */ 1312 esf_nleaves += (leafdata[j] ? 1 : 0); 1313 } 1314 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1315 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1316 for (i=n=0; i<nleaves; i++) { 1317 j = ilocal ? ilocal[i] : i; 1318 if (leafdata[j]) { 1319 new_ilocal[n] = j; 1320 new_iremote[n].rank = iremote[i].rank; 1321 new_iremote[n].index = iremote[i].index; 1322 ++n; 1323 } 1324 } 1325 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1326 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1327 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1328 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1329 } 1330 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /*@C 1335 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1336 1337 Collective 1338 1339 Input Arguments: 1340 + sf - original star forest 1341 . nselected - number of selected leaves on this process 1342 - selected - indices of the selected leaves on this process 1343 1344 Output Arguments: 1345 . newsf - new star forest 1346 1347 Level: advanced 1348 1349 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1350 @*/ 1351 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1352 { 1353 const PetscSFNode *iremote; 1354 PetscSFNode *new_iremote; 1355 const PetscInt *ilocal; 1356 PetscInt i,nroots,*leaves,*new_ilocal; 1357 MPI_Comm comm; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1362 PetscSFCheckGraphSet(sf,1); 1363 if (nselected) PetscValidPointer(selected,3); 1364 PetscValidPointer(newsf,4); 1365 1366 /* Uniq selected[] and put results in leaves[] */ 1367 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1368 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1369 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1370 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1371 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1372 1373 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1374 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1375 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1378 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1379 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1380 for (i=0; i<nselected; ++i) { 1381 const PetscInt l = leaves[i]; 1382 new_ilocal[i] = ilocal ? ilocal[l] : l; 1383 new_iremote[i].rank = iremote[l].rank; 1384 new_iremote[i].index = iremote[l].index; 1385 } 1386 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1387 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1388 } 1389 ierr = PetscFree(leaves);CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /*@C 1394 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1395 1396 Collective on PetscSF 1397 1398 Input Arguments: 1399 + sf - star forest on which to communicate 1400 . unit - data type associated with each node 1401 . rootdata - buffer to broadcast 1402 - op - operation to use for reduction 1403 1404 Output Arguments: 1405 . leafdata - buffer to be reduced with values from each leaf's respective root 1406 1407 Level: intermediate 1408 1409 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1410 @*/ 1411 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1412 { 1413 PetscErrorCode ierr; 1414 PetscMemType rootmtype,leafmtype; 1415 1416 PetscFunctionBegin; 1417 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1418 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1419 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1420 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1421 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1422 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1423 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 /*@C 1428 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1429 1430 Collective 1431 1432 Input Arguments: 1433 + sf - star forest 1434 . unit - data type 1435 . rootdata - buffer to broadcast 1436 - op - operation to use for reduction 1437 1438 Output Arguments: 1439 . leafdata - buffer to be reduced with values from each leaf's respective root 1440 1441 Level: intermediate 1442 1443 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1444 @*/ 1445 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1446 { 1447 PetscErrorCode ierr; 1448 1449 PetscFunctionBegin; 1450 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1451 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1452 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1453 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 /*@C 1458 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1459 1460 Collective 1461 1462 Input Arguments: 1463 + sf - star forest 1464 . unit - data type 1465 . leafdata - values to reduce 1466 - op - reduction operation 1467 1468 Output Arguments: 1469 . rootdata - result of reduction of values from all leaves of each root 1470 1471 Level: intermediate 1472 1473 .seealso: PetscSFBcastBegin() 1474 @*/ 1475 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1476 { 1477 PetscErrorCode ierr; 1478 PetscMemType rootmtype,leafmtype; 1479 1480 PetscFunctionBegin; 1481 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1482 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1483 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1484 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1485 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1486 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1487 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 /*@C 1492 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1493 1494 Collective 1495 1496 Input Arguments: 1497 + sf - star forest 1498 . unit - data type 1499 . leafdata - values to reduce 1500 - op - reduction operation 1501 1502 Output Arguments: 1503 . rootdata - result of reduction of values from all leaves of each root 1504 1505 Level: intermediate 1506 1507 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1508 @*/ 1509 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1510 { 1511 PetscErrorCode ierr; 1512 1513 PetscFunctionBegin; 1514 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1515 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1516 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1517 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 /*@C 1522 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1523 1524 Collective 1525 1526 Input Arguments: 1527 + sf - star forest 1528 . unit - data type 1529 . leafdata - leaf values to use in reduction 1530 - op - operation to use for reduction 1531 1532 Output Arguments: 1533 + rootdata - root values to be updated, input state is seen by first process to perform an update 1534 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1535 1536 Level: advanced 1537 1538 Note: 1539 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1540 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1541 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1542 integers. 1543 1544 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1545 @*/ 1546 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1547 { 1548 PetscErrorCode ierr; 1549 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1550 1551 PetscFunctionBegin; 1552 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1553 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1554 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1555 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1556 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1557 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1558 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1559 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1560 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 /*@C 1565 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1566 1567 Collective 1568 1569 Input Arguments: 1570 + sf - star forest 1571 . unit - data type 1572 . leafdata - leaf values to use in reduction 1573 - op - operation to use for reduction 1574 1575 Output Arguments: 1576 + rootdata - root values to be updated, input state is seen by first process to perform an update 1577 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1578 1579 Level: advanced 1580 1581 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1582 @*/ 1583 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1584 { 1585 PetscErrorCode ierr; 1586 1587 PetscFunctionBegin; 1588 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1589 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1590 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1591 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1592 PetscFunctionReturn(0); 1593 } 1594 1595 /*@C 1596 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1597 1598 Collective 1599 1600 Input Arguments: 1601 . sf - star forest 1602 1603 Output Arguments: 1604 . degree - degree of each root vertex 1605 1606 Level: advanced 1607 1608 Notes: 1609 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1610 1611 .seealso: PetscSFGatherBegin() 1612 @*/ 1613 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1614 { 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1619 PetscSFCheckGraphSet(sf,1); 1620 PetscValidPointer(degree,2); 1621 if (!sf->degreeknown) { 1622 PetscInt i, nroots = sf->nroots, maxlocal; 1623 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1624 maxlocal = sf->maxleaf-sf->minleaf+1; 1625 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1626 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1627 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1628 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1629 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1630 } 1631 *degree = NULL; 1632 PetscFunctionReturn(0); 1633 } 1634 1635 /*@C 1636 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1637 1638 Collective 1639 1640 Input Arguments: 1641 . sf - star forest 1642 1643 Output Arguments: 1644 . degree - degree of each root vertex 1645 1646 Level: developer 1647 1648 Notes: 1649 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1650 1651 .seealso: 1652 @*/ 1653 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1654 { 1655 PetscErrorCode ierr; 1656 1657 PetscFunctionBegin; 1658 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1659 PetscSFCheckGraphSet(sf,1); 1660 PetscValidPointer(degree,2); 1661 if (!sf->degreeknown) { 1662 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1663 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1664 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1665 sf->degreeknown = PETSC_TRUE; 1666 } 1667 *degree = sf->degree; 1668 PetscFunctionReturn(0); 1669 } 1670 1671 1672 /*@C 1673 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1674 Each multi-root is assigned index of the corresponding original root. 1675 1676 Collective 1677 1678 Input Arguments: 1679 + sf - star forest 1680 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1681 1682 Output Arguments: 1683 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1684 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1685 1686 Level: developer 1687 1688 Notes: 1689 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1690 1691 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1692 @*/ 1693 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1694 { 1695 PetscSF msf; 1696 PetscInt i, j, k, nroots, nmroots; 1697 PetscErrorCode ierr; 1698 1699 PetscFunctionBegin; 1700 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1701 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1702 if (nroots) PetscValidIntPointer(degree,2); 1703 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1704 PetscValidPointer(multiRootsOrigNumbering,4); 1705 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1706 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1707 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1708 for (i=0,j=0,k=0; i<nroots; i++) { 1709 if (!degree[i]) continue; 1710 for (j=0; j<degree[i]; j++,k++) { 1711 (*multiRootsOrigNumbering)[k] = i; 1712 } 1713 } 1714 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1715 if (nMultiRoots) *nMultiRoots = nmroots; 1716 PetscFunctionReturn(0); 1717 } 1718 1719 /*@C 1720 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1721 1722 Collective 1723 1724 Input Arguments: 1725 + sf - star forest 1726 . unit - data type 1727 - leafdata - leaf data to gather to roots 1728 1729 Output Argument: 1730 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1731 1732 Level: intermediate 1733 1734 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1735 @*/ 1736 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1737 { 1738 PetscErrorCode ierr; 1739 PetscSF multi = NULL; 1740 1741 PetscFunctionBegin; 1742 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1743 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1744 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1745 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1746 PetscFunctionReturn(0); 1747 } 1748 1749 /*@C 1750 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1751 1752 Collective 1753 1754 Input Arguments: 1755 + sf - star forest 1756 . unit - data type 1757 - leafdata - leaf data to gather to roots 1758 1759 Output Argument: 1760 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1761 1762 Level: intermediate 1763 1764 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1765 @*/ 1766 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1767 { 1768 PetscErrorCode ierr; 1769 PetscSF multi = NULL; 1770 1771 PetscFunctionBegin; 1772 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1773 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1774 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 /*@C 1779 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1780 1781 Collective 1782 1783 Input Arguments: 1784 + sf - star forest 1785 . unit - data type 1786 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1787 1788 Output Argument: 1789 . leafdata - leaf data to be update with personal data from each respective root 1790 1791 Level: intermediate 1792 1793 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1794 @*/ 1795 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1796 { 1797 PetscErrorCode ierr; 1798 PetscSF multi = NULL; 1799 1800 PetscFunctionBegin; 1801 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1802 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1803 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1804 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 /*@C 1809 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1810 1811 Collective 1812 1813 Input Arguments: 1814 + sf - star forest 1815 . unit - data type 1816 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1817 1818 Output Argument: 1819 . leafdata - leaf data to be update with personal data from each respective root 1820 1821 Level: intermediate 1822 1823 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1824 @*/ 1825 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1826 { 1827 PetscErrorCode ierr; 1828 PetscSF multi = NULL; 1829 1830 PetscFunctionBegin; 1831 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1832 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1833 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 1837 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1838 { 1839 PetscInt i, n, nleaves; 1840 const PetscInt *ilocal = NULL; 1841 PetscHSetI seen; 1842 PetscErrorCode ierr; 1843 1844 PetscFunctionBegin; 1845 if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 1846 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1847 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1848 for (i = 0; i < nleaves; i++) { 1849 const PetscInt leaf = ilocal ? ilocal[i] : i; 1850 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1851 } 1852 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1853 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1854 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 /*@ 1859 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1860 1861 Input Parameters: 1862 + sfA - The first PetscSF 1863 - sfB - The second PetscSF 1864 1865 Output Parameters: 1866 . sfBA - The composite SF 1867 1868 Level: developer 1869 1870 Notes: 1871 Currently, the two SFs must be defined on congruent communicators and they must be true star 1872 forests, i.e. the same leaf is not connected with different roots. 1873 1874 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1875 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1876 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1877 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1878 1879 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1880 @*/ 1881 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1882 { 1883 PetscErrorCode ierr; 1884 const PetscSFNode *remotePointsA,*remotePointsB; 1885 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1886 const PetscInt *localPointsA,*localPointsB; 1887 PetscInt *localPointsBA; 1888 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1889 PetscBool denseB; 1890 1891 PetscFunctionBegin; 1892 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1893 PetscSFCheckGraphSet(sfA,1); 1894 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1895 PetscSFCheckGraphSet(sfB,2); 1896 PetscCheckSameComm(sfA,1,sfB,2); 1897 PetscValidPointer(sfBA,3); 1898 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1899 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1900 1901 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1902 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1903 if (localPointsA) { 1904 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1905 for (i=0; i<numRootsB; i++) { 1906 reorderedRemotePointsA[i].rank = -1; 1907 reorderedRemotePointsA[i].index = -1; 1908 } 1909 for (i=0; i<numLeavesA; i++) { 1910 if (localPointsA[i] >= numRootsB) continue; 1911 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1912 } 1913 remotePointsA = reorderedRemotePointsA; 1914 } 1915 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1916 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1917 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1918 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1919 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1920 1921 denseB = (PetscBool)!localPointsB; 1922 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1923 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1924 else numLeavesBA++; 1925 } 1926 if (denseB) { 1927 localPointsBA = NULL; 1928 remotePointsBA = leafdataB; 1929 } else { 1930 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1931 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1932 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1933 const PetscInt l = localPointsB ? localPointsB[i] : i; 1934 1935 if (leafdataB[l-minleaf].rank == -1) continue; 1936 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1937 localPointsBA[numLeavesBA] = l; 1938 numLeavesBA++; 1939 } 1940 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1941 } 1942 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1943 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 1944 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 /*@ 1949 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1950 1951 Input Parameters: 1952 + sfA - The first PetscSF 1953 - sfB - The second PetscSF 1954 1955 Output Parameters: 1956 . sfBA - The composite SF. 1957 1958 Level: developer 1959 1960 Notes: 1961 Currently, the two SFs must be defined on congruent communicators and they must be true star 1962 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1963 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1964 1965 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1966 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1967 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1968 a Reduce on sfB, on connected roots. 1969 1970 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1971 @*/ 1972 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1973 { 1974 PetscErrorCode ierr; 1975 const PetscSFNode *remotePointsA,*remotePointsB; 1976 PetscSFNode *remotePointsBA; 1977 const PetscInt *localPointsA,*localPointsB; 1978 PetscSFNode *reorderedRemotePointsA = NULL; 1979 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1980 MPI_Op op; 1981 #if defined(PETSC_USE_64BIT_INDICES) 1982 PetscBool iswin; 1983 #endif 1984 1985 PetscFunctionBegin; 1986 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1987 PetscSFCheckGraphSet(sfA,1); 1988 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1989 PetscSFCheckGraphSet(sfB,2); 1990 PetscCheckSameComm(sfA,1,sfB,2); 1991 PetscValidPointer(sfBA,3); 1992 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1993 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1994 1995 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1996 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1997 1998 /* TODO: Check roots of sfB have degree of 1 */ 1999 /* Once we implement it, we can replace the MPI_MAXLOC 2000 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 2001 We use MPI_MAXLOC only to have a deterministic output from this routine if 2002 the root condition is not meet. 2003 */ 2004 op = MPI_MAXLOC; 2005 #if defined(PETSC_USE_64BIT_INDICES) 2006 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2007 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2008 if (iswin) op = MPIU_REPLACE; 2009 #endif 2010 2011 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2012 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2013 for (i=0; i<maxleaf - minleaf + 1; i++) { 2014 reorderedRemotePointsA[i].rank = -1; 2015 reorderedRemotePointsA[i].index = -1; 2016 } 2017 if (localPointsA) { 2018 for (i=0; i<numLeavesA; i++) { 2019 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2020 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2021 } 2022 } else { 2023 for (i=0; i<numLeavesA; i++) { 2024 if (i > maxleaf || i < minleaf) continue; 2025 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2026 } 2027 } 2028 2029 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2030 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2031 for (i=0; i<numRootsB; i++) { 2032 remotePointsBA[i].rank = -1; 2033 remotePointsBA[i].index = -1; 2034 } 2035 2036 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2037 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2038 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2039 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2040 if (remotePointsBA[i].rank == -1) continue; 2041 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2042 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2043 localPointsBA[numLeavesBA] = i; 2044 numLeavesBA++; 2045 } 2046 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2047 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2048 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2049 PetscFunctionReturn(0); 2050 } 2051 2052 /* 2053 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2054 2055 Input Parameters: 2056 . sf - The global PetscSF 2057 2058 Output Parameters: 2059 . out - The local PetscSF 2060 */ 2061 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2062 { 2063 MPI_Comm comm; 2064 PetscMPIInt myrank; 2065 const PetscInt *ilocal; 2066 const PetscSFNode *iremote; 2067 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2068 PetscSFNode *liremote; 2069 PetscSF lsf; 2070 PetscErrorCode ierr; 2071 2072 PetscFunctionBegin; 2073 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2074 if (sf->ops->CreateLocalSF) { 2075 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2076 } else { 2077 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2078 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2079 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2080 2081 /* Find out local edges and build a local SF */ 2082 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2083 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2084 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2085 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2086 2087 for (i=j=0; i<nleaves; i++) { 2088 if (iremote[i].rank == (PetscInt)myrank) { 2089 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2090 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2091 liremote[j].index = iremote[i].index; 2092 j++; 2093 } 2094 } 2095 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2096 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2097 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2098 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2099 *out = lsf; 2100 } 2101 PetscFunctionReturn(0); 2102 } 2103 2104 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2105 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2106 { 2107 PetscErrorCode ierr; 2108 PetscMemType rootmtype,leafmtype; 2109 2110 PetscFunctionBegin; 2111 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2112 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2113 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2114 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2115 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2116 if (sf->ops->BcastToZero) { 2117 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2118 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2119 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123