1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_HAVE_CUDA) 6 #include <cuda_runtime.h> 7 #endif 8 9 #if defined(PETSC_HAVE_HIP) 10 #include <hip/hip_runtime.h> 11 #endif 12 13 #if defined(PETSC_USE_DEBUG) 14 # define PetscSFCheckGraphSet(sf,arg) do { \ 15 if (PetscUnlikely(!(sf)->graphset)) \ 16 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 17 } while (0) 18 #else 19 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 20 #endif 21 22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 23 24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 25 { 26 PetscFunctionBegin; 27 PetscValidPointer(type,2); 28 *type = PETSC_MEMTYPE_HOST; 29 #if defined(PETSC_HAVE_CUDA) 30 if (PetscCUDAInitialized && data) { 31 cudaError_t cerr; 32 struct cudaPointerAttributes attr; 33 enum cudaMemoryType mtype; 34 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 35 cudaGetLastError(); /* Reset the last error */ 36 #if (CUDART_VERSION < 10000) 37 mtype = attr.memoryType; 38 #else 39 mtype = attr.type; 40 #endif 41 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 42 } 43 #endif 44 45 #if defined(PETSC_HAVE_HIP) 46 if (PetscHIPInitialized && data) { 47 hipError_t cerr; 48 struct hipPointerAttribute_t attr; 49 enum hipMemoryType mtype; 50 cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 51 hipGetLastError(); /* Reset the last error */ 52 mtype = attr.memoryType; 53 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 54 } 55 #endif 56 PetscFunctionReturn(0); 57 } 58 59 /*@ 60 PetscSFCreate - create a star forest communication context 61 62 Collective 63 64 Input Arguments: 65 . comm - communicator on which the star forest will operate 66 67 Output Arguments: 68 . sf - new star forest context 69 70 Options Database Keys: 71 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 72 . -sf_type window -Use MPI-3 one-sided window for communication 73 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 74 75 Level: intermediate 76 77 Notes: 78 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 79 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 80 SFs are optimized and they have better performance than general SFs. 81 82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 83 @*/ 84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 85 { 86 PetscErrorCode ierr; 87 PetscSF b; 88 89 PetscFunctionBegin; 90 PetscValidPointer(sf,2); 91 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 92 93 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 94 95 b->nroots = -1; 96 b->nleaves = -1; 97 b->minleaf = PETSC_MAX_INT; 98 b->maxleaf = PETSC_MIN_INT; 99 b->nranks = -1; 100 b->rankorder = PETSC_TRUE; 101 b->ingroup = MPI_GROUP_NULL; 102 b->outgroup = MPI_GROUP_NULL; 103 b->graphset = PETSC_FALSE; 104 #if defined(PETSC_HAVE_DEVICE) 105 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 106 b->use_stream_aware_mpi = PETSC_FALSE; 107 b->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 108 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 109 b->backend = PETSCSF_BACKEND_KOKKOS; 110 #elif defined(PETSC_HAVE_CUDA) 111 b->backend = PETSCSF_BACKEND_CUDA; 112 #endif 113 #endif 114 *sf = b; 115 PetscFunctionReturn(0); 116 } 117 118 /*@ 119 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 120 121 Collective 122 123 Input Arguments: 124 . sf - star forest 125 126 Level: advanced 127 128 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 129 @*/ 130 PetscErrorCode PetscSFReset(PetscSF sf) 131 { 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 136 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 137 sf->nroots = -1; 138 sf->nleaves = -1; 139 sf->minleaf = PETSC_MAX_INT; 140 sf->maxleaf = PETSC_MIN_INT; 141 sf->mine = NULL; 142 sf->remote = NULL; 143 sf->graphset = PETSC_FALSE; 144 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 145 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 146 sf->nranks = -1; 147 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 148 #if defined(PETSC_HAVE_DEVICE) 149 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 150 #endif 151 sf->degreeknown = PETSC_FALSE; 152 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 153 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 154 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 155 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 156 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 157 sf->setupcalled = PETSC_FALSE; 158 PetscFunctionReturn(0); 159 } 160 161 /*@C 162 PetscSFSetType - Set the PetscSF communication implementation 163 164 Collective on PetscSF 165 166 Input Parameters: 167 + sf - the PetscSF context 168 - type - a known method 169 170 Options Database Key: 171 . -sf_type <type> - Sets the method; use -help for a list 172 of available methods (for instance, window, basic, neighbor) 173 174 Notes: 175 See "include/petscsf.h" for available methods (for instance) 176 + PETSCSFWINDOW - MPI-2/3 one-sided 177 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 178 179 Level: intermediate 180 181 .seealso: PetscSFType, PetscSFCreate() 182 @*/ 183 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 184 { 185 PetscErrorCode ierr,(*r)(PetscSF); 186 PetscBool match; 187 188 PetscFunctionBegin; 189 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 190 PetscValidCharPointer(type,2); 191 192 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 193 if (match) PetscFunctionReturn(0); 194 195 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 196 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 197 /* Destroy the previous PetscSF implementation context */ 198 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 199 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 200 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 201 ierr = (*r)(sf);CHKERRQ(ierr); 202 PetscFunctionReturn(0); 203 } 204 205 /*@C 206 PetscSFGetType - Get the PetscSF communication implementation 207 208 Not Collective 209 210 Input Parameter: 211 . sf - the PetscSF context 212 213 Output Parameter: 214 . type - the PetscSF type name 215 216 Level: intermediate 217 218 .seealso: PetscSFSetType(), PetscSFCreate() 219 @*/ 220 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 221 { 222 PetscFunctionBegin; 223 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 224 PetscValidPointer(type,2); 225 *type = ((PetscObject)sf)->type_name; 226 PetscFunctionReturn(0); 227 } 228 229 /*@ 230 PetscSFDestroy - destroy star forest 231 232 Collective 233 234 Input Arguments: 235 . sf - address of star forest 236 237 Level: intermediate 238 239 .seealso: PetscSFCreate(), PetscSFReset() 240 @*/ 241 PetscErrorCode PetscSFDestroy(PetscSF *sf) 242 { 243 PetscErrorCode ierr; 244 245 PetscFunctionBegin; 246 if (!*sf) PetscFunctionReturn(0); 247 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 248 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 249 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 250 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 251 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 256 { 257 PetscInt i, nleaves; 258 PetscMPIInt size; 259 const PetscInt *ilocal; 260 const PetscSFNode *iremote; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 265 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 266 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 267 for (i = 0; i < nleaves; i++) { 268 const PetscInt rank = iremote[i].rank; 269 const PetscInt remote = iremote[i].index; 270 const PetscInt leaf = ilocal ? ilocal[i] : i; 271 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 272 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 273 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 274 } 275 PetscFunctionReturn(0); 276 } 277 278 279 280 281 /*@ 282 PetscSFSetUp - set up communication structures 283 284 Collective 285 286 Input Arguments: 287 . sf - star forest communication object 288 289 Level: beginner 290 291 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 292 @*/ 293 PetscErrorCode PetscSFSetUp(PetscSF sf) 294 { 295 PetscErrorCode ierr; 296 297 PetscFunctionBegin; 298 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 299 PetscSFCheckGraphSet(sf,1); 300 if (sf->setupcalled) PetscFunctionReturn(0); 301 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 302 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 303 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 304 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 305 #if defined(PETSC_HAVE_CUDA) 306 if (sf->backend == PETSCSF_BACKEND_CUDA) { 307 sf->ops->Malloc = PetscSFMalloc_Cuda; 308 sf->ops->Free = PetscSFFree_Cuda; 309 } 310 #endif 311 312 #if defined(PETSC_HAVE_KOKKOS) 313 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 314 sf->ops->Malloc = PetscSFMalloc_Kokkos; 315 sf->ops->Free = PetscSFFree_Kokkos; 316 } 317 #endif 318 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 319 sf->setupcalled = PETSC_TRUE; 320 PetscFunctionReturn(0); 321 } 322 323 /*@ 324 PetscSFSetFromOptions - set PetscSF options using the options database 325 326 Logically Collective 327 328 Input Arguments: 329 . sf - star forest 330 331 Options Database Keys: 332 + -sf_type - implementation type, see PetscSFSetType() 333 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 334 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 335 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 336 If true, this option only works with -use_gpu_aware_mpi 1. 337 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 338 If true, this option only works with -use_gpu_aware_mpi 1. 339 340 - -sf_backend cuda | kokkos -Select the device backend SF uses. Currently SF has two backends: cuda and Kokkos. 341 On CUDA devices, one can choose cuda or kokkos with the default being cuda. On other devices, 342 the only available is kokkos. 343 344 Level: intermediate 345 @*/ 346 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 347 { 348 PetscSFType deft; 349 char type[256]; 350 PetscErrorCode ierr; 351 PetscBool flg; 352 353 PetscFunctionBegin; 354 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 355 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 356 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 357 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 358 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 359 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 360 #if defined(PETSC_HAVE_DEVICE) 361 { 362 char backendstr[32] = {0}; 363 PetscBool isCuda = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 364 /* Change the defaults set in PetscSFCreate() with command line options */ 365 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 366 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 367 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 368 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 369 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 370 #if defined(PETSC_HAVE_CUDA) 371 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 372 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 373 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported on cuda devices. You may choose cuda or kokkos (if installed)", backendstr); 374 #elif defined(PETSC_HAVE_KOKKOS) 375 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 376 #endif 377 } 378 #endif 379 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 380 ierr = PetscOptionsEnd();CHKERRQ(ierr); 381 PetscFunctionReturn(0); 382 } 383 384 /*@ 385 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 386 387 Logically Collective 388 389 Input Arguments: 390 + sf - star forest 391 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 392 393 Level: advanced 394 395 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 396 @*/ 397 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 398 { 399 PetscFunctionBegin; 400 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 401 PetscValidLogicalCollectiveBool(sf,flg,2); 402 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 403 sf->rankorder = flg; 404 PetscFunctionReturn(0); 405 } 406 407 /*@ 408 PetscSFSetGraph - Set a parallel star forest 409 410 Collective 411 412 Input Arguments: 413 + sf - star forest 414 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 415 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 416 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 417 during setup in debug mode) 418 . localmode - copy mode for ilocal 419 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 420 during setup in debug mode) 421 - remotemode - copy mode for iremote 422 423 Level: intermediate 424 425 Notes: 426 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 427 428 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 429 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 430 needed 431 432 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 433 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 434 435 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 436 @*/ 437 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 438 { 439 PetscErrorCode ierr; 440 441 PetscFunctionBegin; 442 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 443 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 444 if (nleaves > 0) PetscValidPointer(iremote,6); 445 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 446 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 447 448 if (sf->nroots >= 0) { /* Reset only if graph already set */ 449 ierr = PetscSFReset(sf);CHKERRQ(ierr); 450 } 451 452 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 453 454 sf->nroots = nroots; 455 sf->nleaves = nleaves; 456 457 if (nleaves && ilocal) { 458 PetscInt i; 459 PetscInt minleaf = PETSC_MAX_INT; 460 PetscInt maxleaf = PETSC_MIN_INT; 461 int contiguous = 1; 462 for (i=0; i<nleaves; i++) { 463 minleaf = PetscMin(minleaf,ilocal[i]); 464 maxleaf = PetscMax(maxleaf,ilocal[i]); 465 contiguous &= (ilocal[i] == i); 466 } 467 sf->minleaf = minleaf; 468 sf->maxleaf = maxleaf; 469 if (contiguous) { 470 if (localmode == PETSC_OWN_POINTER) { 471 ierr = PetscFree(ilocal);CHKERRQ(ierr); 472 } 473 ilocal = NULL; 474 } 475 } else { 476 sf->minleaf = 0; 477 sf->maxleaf = nleaves - 1; 478 } 479 480 if (ilocal) { 481 switch (localmode) { 482 case PETSC_COPY_VALUES: 483 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 484 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 485 sf->mine = sf->mine_alloc; 486 break; 487 case PETSC_OWN_POINTER: 488 sf->mine_alloc = (PetscInt*)ilocal; 489 sf->mine = sf->mine_alloc; 490 break; 491 case PETSC_USE_POINTER: 492 sf->mine_alloc = NULL; 493 sf->mine = (PetscInt*)ilocal; 494 break; 495 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 496 } 497 } 498 499 switch (remotemode) { 500 case PETSC_COPY_VALUES: 501 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 502 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 503 sf->remote = sf->remote_alloc; 504 break; 505 case PETSC_OWN_POINTER: 506 sf->remote_alloc = (PetscSFNode*)iremote; 507 sf->remote = sf->remote_alloc; 508 break; 509 case PETSC_USE_POINTER: 510 sf->remote_alloc = NULL; 511 sf->remote = (PetscSFNode*)iremote; 512 break; 513 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 514 } 515 516 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 517 sf->graphset = PETSC_TRUE; 518 PetscFunctionReturn(0); 519 } 520 521 /*@ 522 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 523 524 Collective 525 526 Input Parameters: 527 + sf - The PetscSF 528 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 529 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 530 531 Notes: 532 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 533 n and N are local and global sizes of x respectively. 534 535 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 536 sequential vectors y on all ranks. 537 538 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 539 sequential vector y on rank 0. 540 541 In above cases, entries of x are roots and entries of y are leaves. 542 543 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 544 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 545 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 546 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 547 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 548 549 In this case, roots and leaves are symmetric. 550 551 Level: intermediate 552 @*/ 553 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 554 { 555 MPI_Comm comm; 556 PetscInt n,N,res[2]; 557 PetscMPIInt rank,size; 558 PetscSFType type; 559 PetscErrorCode ierr; 560 561 PetscFunctionBegin; 562 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 563 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 564 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 565 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 566 567 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 568 type = PETSCSFALLTOALL; 569 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 570 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 571 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 572 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 573 } else { 574 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 575 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 576 res[0] = n; 577 res[1] = -n; 578 /* Check if n are same over all ranks so that we can optimize it */ 579 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 580 if (res[0] == -res[1]) { /* same n */ 581 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 582 } else { 583 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 584 } 585 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 586 } 587 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 588 589 sf->pattern = pattern; 590 sf->mine = NULL; /* Contiguous */ 591 592 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 593 Also set other easy stuff. 594 */ 595 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 596 sf->nleaves = N; 597 sf->nroots = n; 598 sf->nranks = size; 599 sf->minleaf = 0; 600 sf->maxleaf = N - 1; 601 } else if (pattern == PETSCSF_PATTERN_GATHER) { 602 sf->nleaves = rank ? 0 : N; 603 sf->nroots = n; 604 sf->nranks = rank ? 0 : size; 605 sf->minleaf = 0; 606 sf->maxleaf = rank ? -1 : N - 1; 607 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 608 sf->nleaves = size; 609 sf->nroots = size; 610 sf->nranks = size; 611 sf->minleaf = 0; 612 sf->maxleaf = size - 1; 613 } 614 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 615 sf->graphset = PETSC_TRUE; 616 PetscFunctionReturn(0); 617 } 618 619 /*@ 620 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 621 622 Collective 623 624 Input Arguments: 625 626 . sf - star forest to invert 627 628 Output Arguments: 629 . isf - inverse of sf 630 Level: advanced 631 632 Notes: 633 All roots must have degree 1. 634 635 The local space may be a permutation, but cannot be sparse. 636 637 .seealso: PetscSFSetGraph() 638 @*/ 639 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 640 { 641 PetscErrorCode ierr; 642 PetscMPIInt rank; 643 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 644 const PetscInt *ilocal; 645 PetscSFNode *roots,*leaves; 646 647 PetscFunctionBegin; 648 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 649 PetscSFCheckGraphSet(sf,1); 650 PetscValidPointer(isf,2); 651 652 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 653 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 654 655 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 656 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 657 for (i=0; i<maxlocal; i++) { 658 leaves[i].rank = rank; 659 leaves[i].index = i; 660 } 661 for (i=0; i <nroots; i++) { 662 roots[i].rank = -1; 663 roots[i].index = -1; 664 } 665 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 666 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 667 668 /* Check whether our leaves are sparse */ 669 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 670 if (count == nroots) newilocal = NULL; 671 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 672 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 673 for (i=0,count=0; i<nroots; i++) { 674 if (roots[i].rank >= 0) { 675 newilocal[count] = i; 676 roots[count].rank = roots[i].rank; 677 roots[count].index = roots[i].index; 678 count++; 679 } 680 } 681 } 682 683 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 684 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 685 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 /*@ 690 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 691 692 Collective 693 694 Input Arguments: 695 + sf - communication object to duplicate 696 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 697 698 Output Arguments: 699 . newsf - new communication object 700 701 Level: beginner 702 703 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 704 @*/ 705 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 706 { 707 PetscSFType type; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 712 PetscValidLogicalCollectiveEnum(sf,opt,2); 713 PetscValidPointer(newsf,3); 714 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 715 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 716 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 717 if (opt == PETSCSF_DUPLICATE_GRAPH) { 718 PetscSFCheckGraphSet(sf,1); 719 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 720 PetscInt nroots,nleaves; 721 const PetscInt *ilocal; 722 const PetscSFNode *iremote; 723 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 724 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 725 } else { 726 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 727 } 728 } 729 #if defined(PETSC_HAVE_DEVICE) 730 (*newsf)->backend = sf->backend; 731 (*newsf)->use_default_stream = sf->use_default_stream; 732 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 733 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 734 #endif 735 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 736 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 737 PetscFunctionReturn(0); 738 } 739 740 /*@C 741 PetscSFGetGraph - Get the graph specifying a parallel star forest 742 743 Not Collective 744 745 Input Arguments: 746 . sf - star forest 747 748 Output Arguments: 749 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 750 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 751 . ilocal - locations of leaves in leafdata buffers 752 - iremote - remote locations of root vertices for each leaf on the current process 753 754 Notes: 755 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 756 757 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 758 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 759 760 Level: intermediate 761 762 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 763 @*/ 764 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 765 { 766 PetscErrorCode ierr; 767 768 PetscFunctionBegin; 769 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 770 if (sf->ops->GetGraph) { 771 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 772 } else { 773 if (nroots) *nroots = sf->nroots; 774 if (nleaves) *nleaves = sf->nleaves; 775 if (ilocal) *ilocal = sf->mine; 776 if (iremote) *iremote = sf->remote; 777 } 778 PetscFunctionReturn(0); 779 } 780 781 /*@ 782 PetscSFGetLeafRange - Get the active leaf ranges 783 784 Not Collective 785 786 Input Arguments: 787 . sf - star forest 788 789 Output Arguments: 790 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 791 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 792 793 Level: developer 794 795 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 796 @*/ 797 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 798 { 799 PetscFunctionBegin; 800 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 801 PetscSFCheckGraphSet(sf,1); 802 if (minleaf) *minleaf = sf->minleaf; 803 if (maxleaf) *maxleaf = sf->maxleaf; 804 PetscFunctionReturn(0); 805 } 806 807 /*@C 808 PetscSFViewFromOptions - View from Options 809 810 Collective on PetscSF 811 812 Input Parameters: 813 + A - the star forest 814 . obj - Optional object 815 - name - command line option 816 817 Level: intermediate 818 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 819 @*/ 820 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 821 { 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 826 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 /*@C 831 PetscSFView - view a star forest 832 833 Collective 834 835 Input Arguments: 836 + sf - star forest 837 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 838 839 Level: beginner 840 841 .seealso: PetscSFCreate(), PetscSFSetGraph() 842 @*/ 843 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 844 { 845 PetscErrorCode ierr; 846 PetscBool iascii; 847 PetscViewerFormat format; 848 849 PetscFunctionBegin; 850 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 851 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 852 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 853 PetscCheckSameComm(sf,1,viewer,2); 854 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 855 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 856 if (iascii) { 857 PetscMPIInt rank; 858 PetscInt ii,i,j; 859 860 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 861 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 862 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 863 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 864 if (!sf->graphset) { 865 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 866 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 870 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 872 for (i=0; i<sf->nleaves; i++) { 873 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 874 } 875 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 876 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 877 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 878 PetscMPIInt *tmpranks,*perm; 879 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 880 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 881 for (i=0; i<sf->nranks; i++) perm[i] = i; 882 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 883 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 884 for (ii=0; ii<sf->nranks; ii++) { 885 i = perm[ii]; 886 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 887 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 889 } 890 } 891 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 892 } 893 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 895 } 896 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 897 } 898 PetscFunctionReturn(0); 899 } 900 901 /*@C 902 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 903 904 Not Collective 905 906 Input Arguments: 907 . sf - star forest 908 909 Output Arguments: 910 + nranks - number of ranks referenced by local part 911 . ranks - array of ranks 912 . roffset - offset in rmine/rremote for each rank (length nranks+1) 913 . rmine - concatenated array holding local indices referencing each remote rank 914 - rremote - concatenated array holding remote indices referenced for each remote rank 915 916 Level: developer 917 918 .seealso: PetscSFGetLeafRanks() 919 @*/ 920 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 921 { 922 PetscErrorCode ierr; 923 924 PetscFunctionBegin; 925 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 926 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 927 if (sf->ops->GetRootRanks) { 928 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 929 } else { 930 /* The generic implementation */ 931 if (nranks) *nranks = sf->nranks; 932 if (ranks) *ranks = sf->ranks; 933 if (roffset) *roffset = sf->roffset; 934 if (rmine) *rmine = sf->rmine; 935 if (rremote) *rremote = sf->rremote; 936 } 937 PetscFunctionReturn(0); 938 } 939 940 /*@C 941 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 942 943 Not Collective 944 945 Input Arguments: 946 . sf - star forest 947 948 Output Arguments: 949 + niranks - number of leaf ranks referencing roots on this process 950 . iranks - array of ranks 951 . ioffset - offset in irootloc for each rank (length niranks+1) 952 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 953 954 Level: developer 955 956 .seealso: PetscSFGetRootRanks() 957 @*/ 958 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 959 { 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 964 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 965 if (sf->ops->GetLeafRanks) { 966 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 967 } else { 968 PetscSFType type; 969 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 970 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 976 PetscInt i; 977 for (i=0; i<n; i++) { 978 if (needle == list[i]) return PETSC_TRUE; 979 } 980 return PETSC_FALSE; 981 } 982 983 /*@C 984 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 985 986 Collective 987 988 Input Arguments: 989 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 990 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 991 992 Level: developer 993 994 .seealso: PetscSFGetRootRanks() 995 @*/ 996 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 997 { 998 PetscErrorCode ierr; 999 PetscTable table; 1000 PetscTablePosition pos; 1001 PetscMPIInt size,groupsize,*groupranks; 1002 PetscInt *rcount,*ranks; 1003 PetscInt i, irank = -1,orank = -1; 1004 1005 PetscFunctionBegin; 1006 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1007 PetscSFCheckGraphSet(sf,1); 1008 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 1009 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1010 for (i=0; i<sf->nleaves; i++) { 1011 /* Log 1-based rank */ 1012 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1013 } 1014 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1015 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1016 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1017 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1018 for (i=0; i<sf->nranks; i++) { 1019 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1020 ranks[i]--; /* Convert back to 0-based */ 1021 } 1022 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1023 1024 /* We expect that dgroup is reliably "small" while nranks could be large */ 1025 { 1026 MPI_Group group = MPI_GROUP_NULL; 1027 PetscMPIInt *dgroupranks; 1028 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1029 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 1030 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1031 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1032 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1033 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 1034 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1035 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1036 } 1037 1038 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1039 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1040 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1041 if (InList(ranks[i],groupsize,groupranks)) break; 1042 } 1043 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1044 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1045 } 1046 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1047 PetscInt tmprank,tmpcount; 1048 1049 tmprank = ranks[i]; 1050 tmpcount = rcount[i]; 1051 ranks[i] = ranks[sf->ndranks]; 1052 rcount[i] = rcount[sf->ndranks]; 1053 ranks[sf->ndranks] = tmprank; 1054 rcount[sf->ndranks] = tmpcount; 1055 sf->ndranks++; 1056 } 1057 } 1058 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1059 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1060 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1061 sf->roffset[0] = 0; 1062 for (i=0; i<sf->nranks; i++) { 1063 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1064 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1065 rcount[i] = 0; 1066 } 1067 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1068 /* short circuit */ 1069 if (orank != sf->remote[i].rank) { 1070 /* Search for index of iremote[i].rank in sf->ranks */ 1071 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1072 if (irank < 0) { 1073 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1074 if (irank >= 0) irank += sf->ndranks; 1075 } 1076 orank = sf->remote[i].rank; 1077 } 1078 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1079 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1080 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1081 rcount[irank]++; 1082 } 1083 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /*@C 1088 PetscSFGetGroups - gets incoming and outgoing process groups 1089 1090 Collective 1091 1092 Input Argument: 1093 . sf - star forest 1094 1095 Output Arguments: 1096 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1097 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1098 1099 Level: developer 1100 1101 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1102 @*/ 1103 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1104 { 1105 PetscErrorCode ierr; 1106 MPI_Group group = MPI_GROUP_NULL; 1107 1108 PetscFunctionBegin; 1109 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1110 if (sf->ingroup == MPI_GROUP_NULL) { 1111 PetscInt i; 1112 const PetscInt *indegree; 1113 PetscMPIInt rank,*outranks,*inranks; 1114 PetscSFNode *remote; 1115 PetscSF bgcount; 1116 1117 /* Compute the number of incoming ranks */ 1118 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1119 for (i=0; i<sf->nranks; i++) { 1120 remote[i].rank = sf->ranks[i]; 1121 remote[i].index = 0; 1122 } 1123 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1124 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1125 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1126 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1127 /* Enumerate the incoming ranks */ 1128 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1129 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1130 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1131 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1132 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1133 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1134 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1135 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1136 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1137 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1138 } 1139 *incoming = sf->ingroup; 1140 1141 if (sf->outgroup == MPI_GROUP_NULL) { 1142 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1143 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1144 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1145 } 1146 *outgoing = sf->outgroup; 1147 PetscFunctionReturn(0); 1148 } 1149 1150 /*@ 1151 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1152 1153 Collective 1154 1155 Input Argument: 1156 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1157 1158 Output Arguments: 1159 . multi - star forest with split roots, such that each root has degree exactly 1 1160 1161 Level: developer 1162 1163 Notes: 1164 1165 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1166 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1167 edge, it is a candidate for future optimization that might involve its removal. 1168 1169 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1170 @*/ 1171 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1172 { 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1177 PetscValidPointer(multi,2); 1178 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1179 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1180 *multi = sf->multi; 1181 PetscFunctionReturn(0); 1182 } 1183 if (!sf->multi) { 1184 const PetscInt *indegree; 1185 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1186 PetscSFNode *remote; 1187 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1188 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1189 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1190 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1191 inoffset[0] = 0; 1192 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1193 for (i=0; i<maxlocal; i++) outones[i] = 1; 1194 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1195 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1196 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1197 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1198 for (i=0; i<sf->nroots; i++) { 1199 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1200 } 1201 } 1202 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1203 for (i=0; i<sf->nleaves; i++) { 1204 remote[i].rank = sf->remote[i].rank; 1205 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1206 } 1207 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1208 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1209 if (sf->rankorder) { /* Sort the ranks */ 1210 PetscMPIInt rank; 1211 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1212 PetscSFNode *newremote; 1213 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1214 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1215 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1216 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1217 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1218 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1219 /* Sort the incoming ranks at each vertex, build the inverse map */ 1220 for (i=0; i<sf->nroots; i++) { 1221 PetscInt j; 1222 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1223 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1224 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1225 } 1226 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1227 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1228 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1229 for (i=0; i<sf->nleaves; i++) { 1230 newremote[i].rank = sf->remote[i].rank; 1231 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1232 } 1233 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1234 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1235 } 1236 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1237 } 1238 *multi = sf->multi; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 /*@C 1243 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1244 1245 Collective 1246 1247 Input Arguments: 1248 + sf - original star forest 1249 . nselected - number of selected roots on this process 1250 - selected - indices of the selected roots on this process 1251 1252 Output Arguments: 1253 . esf - new star forest 1254 1255 Level: advanced 1256 1257 Note: 1258 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1259 be done by calling PetscSFGetGraph(). 1260 1261 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1262 @*/ 1263 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1264 { 1265 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1266 const PetscInt *ilocal; 1267 signed char *rootdata,*leafdata,*leafmem; 1268 const PetscSFNode *iremote; 1269 PetscSFNode *new_iremote; 1270 MPI_Comm comm; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1275 PetscSFCheckGraphSet(sf,1); 1276 if (nselected) PetscValidPointer(selected,3); 1277 PetscValidPointer(esf,4); 1278 1279 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1280 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1281 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1282 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1283 1284 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1285 PetscBool dups; 1286 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1287 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1288 for (i=0; i<nselected; i++) 1289 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1290 } 1291 1292 if (sf->ops->CreateEmbeddedSF) { 1293 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1294 } else { 1295 /* A generic version of creating embedded sf */ 1296 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1297 maxlocal = maxleaf - minleaf + 1; 1298 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1299 leafdata = leafmem - minleaf; 1300 /* Tag selected roots and bcast to leaves */ 1301 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1302 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1303 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1304 1305 /* Build esf with leaves that are still connected */ 1306 esf_nleaves = 0; 1307 for (i=0; i<nleaves; i++) { 1308 j = ilocal ? ilocal[i] : i; 1309 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1310 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1311 */ 1312 esf_nleaves += (leafdata[j] ? 1 : 0); 1313 } 1314 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1315 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1316 for (i=n=0; i<nleaves; i++) { 1317 j = ilocal ? ilocal[i] : i; 1318 if (leafdata[j]) { 1319 new_ilocal[n] = j; 1320 new_iremote[n].rank = iremote[i].rank; 1321 new_iremote[n].index = iremote[i].index; 1322 ++n; 1323 } 1324 } 1325 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1326 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1327 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1328 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1329 } 1330 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /*@C 1335 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1336 1337 Collective 1338 1339 Input Arguments: 1340 + sf - original star forest 1341 . nselected - number of selected leaves on this process 1342 - selected - indices of the selected leaves on this process 1343 1344 Output Arguments: 1345 . newsf - new star forest 1346 1347 Level: advanced 1348 1349 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1350 @*/ 1351 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1352 { 1353 const PetscSFNode *iremote; 1354 PetscSFNode *new_iremote; 1355 const PetscInt *ilocal; 1356 PetscInt i,nroots,*leaves,*new_ilocal; 1357 MPI_Comm comm; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1362 PetscSFCheckGraphSet(sf,1); 1363 if (nselected) PetscValidPointer(selected,3); 1364 PetscValidPointer(newsf,4); 1365 1366 /* Uniq selected[] and put results in leaves[] */ 1367 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1368 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1369 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1370 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1371 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1372 1373 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1374 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1375 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1378 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1379 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1380 for (i=0; i<nselected; ++i) { 1381 const PetscInt l = leaves[i]; 1382 new_ilocal[i] = ilocal ? ilocal[l] : l; 1383 new_iremote[i].rank = iremote[l].rank; 1384 new_iremote[i].index = iremote[l].index; 1385 } 1386 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1387 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1388 } 1389 ierr = PetscFree(leaves);CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /*@C 1394 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1395 1396 Collective on PetscSF 1397 1398 Input Arguments: 1399 + sf - star forest on which to communicate 1400 . unit - data type associated with each node 1401 . rootdata - buffer to broadcast 1402 - op - operation to use for reduction 1403 1404 Output Arguments: 1405 . leafdata - buffer to be reduced with values from each leaf's respective root 1406 1407 Level: intermediate 1408 1409 Notes: 1410 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1411 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1412 use PetscSFBcastAndOpWithMemTypeBegin() instead. 1413 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin() 1414 @*/ 1415 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1416 { 1417 PetscErrorCode ierr; 1418 PetscMemType rootmtype,leafmtype; 1419 1420 PetscFunctionBegin; 1421 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1422 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1423 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1424 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1425 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1426 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1427 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 /*@C 1432 PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd() 1433 1434 Collective on PetscSF 1435 1436 Input Arguments: 1437 + sf - star forest on which to communicate 1438 . unit - data type associated with each node 1439 . rootmtype - memory type of rootdata 1440 . rootdata - buffer to broadcast 1441 . leafmtype - memory type of leafdata 1442 - op - operation to use for reduction 1443 1444 Output Arguments: 1445 . leafdata - buffer to be reduced with values from each leaf's respective root 1446 1447 Level: intermediate 1448 1449 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin() 1450 @*/ 1451 PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1452 { 1453 PetscErrorCode ierr; 1454 1455 PetscFunctionBegin; 1456 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1457 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1458 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1459 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1460 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 /*@C 1465 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1466 1467 Collective 1468 1469 Input Arguments: 1470 + sf - star forest 1471 . unit - data type 1472 . rootdata - buffer to broadcast 1473 - op - operation to use for reduction 1474 1475 Output Arguments: 1476 . leafdata - buffer to be reduced with values from each leaf's respective root 1477 1478 Level: intermediate 1479 1480 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1481 @*/ 1482 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1483 { 1484 PetscErrorCode ierr; 1485 1486 PetscFunctionBegin; 1487 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1488 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1489 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1490 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 /*@C 1495 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1496 1497 Collective 1498 1499 Input Arguments: 1500 + sf - star forest 1501 . unit - data type 1502 . leafdata - values to reduce 1503 - op - reduction operation 1504 1505 Output Arguments: 1506 . rootdata - result of reduction of values from all leaves of each root 1507 1508 Level: intermediate 1509 1510 Notes: 1511 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1512 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1513 use PetscSFReduceWithMemTypeBegin() instead. 1514 1515 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1516 @*/ 1517 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1518 { 1519 PetscErrorCode ierr; 1520 PetscMemType rootmtype,leafmtype; 1521 1522 PetscFunctionBegin; 1523 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1524 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1525 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1526 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1527 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1528 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1529 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 /*@C 1534 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1535 1536 Collective 1537 1538 Input Arguments: 1539 + sf - star forest 1540 . unit - data type 1541 . leafmtype - memory type of leafdata 1542 . leafdata - values to reduce 1543 . rootmtype - memory type of rootdata 1544 - op - reduction operation 1545 1546 Output Arguments: 1547 . rootdata - result of reduction of values from all leaves of each root 1548 1549 Level: intermediate 1550 1551 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1552 @*/ 1553 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1554 { 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1559 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1560 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1561 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1562 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 /*@C 1567 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1568 1569 Collective 1570 1571 Input Arguments: 1572 + sf - star forest 1573 . unit - data type 1574 . leafdata - values to reduce 1575 - op - reduction operation 1576 1577 Output Arguments: 1578 . rootdata - result of reduction of values from all leaves of each root 1579 1580 Level: intermediate 1581 1582 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1583 @*/ 1584 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1585 { 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1590 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1591 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1592 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1593 PetscFunctionReturn(0); 1594 } 1595 1596 /*@C 1597 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1598 1599 Collective 1600 1601 Input Arguments: 1602 + sf - star forest 1603 . unit - data type 1604 . leafdata - leaf values to use in reduction 1605 - op - operation to use for reduction 1606 1607 Output Arguments: 1608 + rootdata - root values to be updated, input state is seen by first process to perform an update 1609 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1610 1611 Level: advanced 1612 1613 Note: 1614 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1615 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1616 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1617 integers. 1618 1619 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1620 @*/ 1621 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1622 { 1623 PetscErrorCode ierr; 1624 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1625 1626 PetscFunctionBegin; 1627 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1628 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1629 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1630 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1631 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1632 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1633 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1634 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1635 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1636 PetscFunctionReturn(0); 1637 } 1638 1639 /*@C 1640 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1641 1642 Collective 1643 1644 Input Arguments: 1645 + sf - star forest 1646 . unit - data type 1647 . leafdata - leaf values to use in reduction 1648 - op - operation to use for reduction 1649 1650 Output Arguments: 1651 + rootdata - root values to be updated, input state is seen by first process to perform an update 1652 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1653 1654 Level: advanced 1655 1656 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1657 @*/ 1658 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1659 { 1660 PetscErrorCode ierr; 1661 1662 PetscFunctionBegin; 1663 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1664 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1665 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1666 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 /*@C 1671 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1672 1673 Collective 1674 1675 Input Arguments: 1676 . sf - star forest 1677 1678 Output Arguments: 1679 . degree - degree of each root vertex 1680 1681 Level: advanced 1682 1683 Notes: 1684 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1685 1686 .seealso: PetscSFGatherBegin() 1687 @*/ 1688 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1689 { 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1694 PetscSFCheckGraphSet(sf,1); 1695 PetscValidPointer(degree,2); 1696 if (!sf->degreeknown) { 1697 PetscInt i, nroots = sf->nroots, maxlocal; 1698 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1699 maxlocal = sf->maxleaf-sf->minleaf+1; 1700 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1701 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1702 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1703 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1704 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1705 } 1706 *degree = NULL; 1707 PetscFunctionReturn(0); 1708 } 1709 1710 /*@C 1711 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1712 1713 Collective 1714 1715 Input Arguments: 1716 . sf - star forest 1717 1718 Output Arguments: 1719 . degree - degree of each root vertex 1720 1721 Level: developer 1722 1723 Notes: 1724 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1725 1726 .seealso: 1727 @*/ 1728 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1729 { 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1734 PetscSFCheckGraphSet(sf,1); 1735 PetscValidPointer(degree,2); 1736 if (!sf->degreeknown) { 1737 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1738 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1739 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1740 sf->degreeknown = PETSC_TRUE; 1741 } 1742 *degree = sf->degree; 1743 PetscFunctionReturn(0); 1744 } 1745 1746 1747 /*@C 1748 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1749 Each multi-root is assigned index of the corresponding original root. 1750 1751 Collective 1752 1753 Input Arguments: 1754 + sf - star forest 1755 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1756 1757 Output Arguments: 1758 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1759 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1760 1761 Level: developer 1762 1763 Notes: 1764 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1765 1766 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1767 @*/ 1768 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1769 { 1770 PetscSF msf; 1771 PetscInt i, j, k, nroots, nmroots; 1772 PetscErrorCode ierr; 1773 1774 PetscFunctionBegin; 1775 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1776 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1777 if (nroots) PetscValidIntPointer(degree,2); 1778 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1779 PetscValidPointer(multiRootsOrigNumbering,4); 1780 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1781 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1782 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1783 for (i=0,j=0,k=0; i<nroots; i++) { 1784 if (!degree[i]) continue; 1785 for (j=0; j<degree[i]; j++,k++) { 1786 (*multiRootsOrigNumbering)[k] = i; 1787 } 1788 } 1789 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1790 if (nMultiRoots) *nMultiRoots = nmroots; 1791 PetscFunctionReturn(0); 1792 } 1793 1794 /*@C 1795 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1796 1797 Collective 1798 1799 Input Arguments: 1800 + sf - star forest 1801 . unit - data type 1802 - leafdata - leaf data to gather to roots 1803 1804 Output Argument: 1805 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1806 1807 Level: intermediate 1808 1809 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1810 @*/ 1811 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1812 { 1813 PetscErrorCode ierr; 1814 PetscSF multi = NULL; 1815 1816 PetscFunctionBegin; 1817 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1818 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1819 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1820 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 /*@C 1825 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1826 1827 Collective 1828 1829 Input Arguments: 1830 + sf - star forest 1831 . unit - data type 1832 - leafdata - leaf data to gather to roots 1833 1834 Output Argument: 1835 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1836 1837 Level: intermediate 1838 1839 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1840 @*/ 1841 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1842 { 1843 PetscErrorCode ierr; 1844 PetscSF multi = NULL; 1845 1846 PetscFunctionBegin; 1847 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1848 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1849 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 /*@C 1854 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1855 1856 Collective 1857 1858 Input Arguments: 1859 + sf - star forest 1860 . unit - data type 1861 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1862 1863 Output Argument: 1864 . leafdata - leaf data to be update with personal data from each respective root 1865 1866 Level: intermediate 1867 1868 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1869 @*/ 1870 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1871 { 1872 PetscErrorCode ierr; 1873 PetscSF multi = NULL; 1874 1875 PetscFunctionBegin; 1876 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1877 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1878 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1879 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1880 PetscFunctionReturn(0); 1881 } 1882 1883 /*@C 1884 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1885 1886 Collective 1887 1888 Input Arguments: 1889 + sf - star forest 1890 . unit - data type 1891 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1892 1893 Output Argument: 1894 . leafdata - leaf data to be update with personal data from each respective root 1895 1896 Level: intermediate 1897 1898 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1899 @*/ 1900 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1901 { 1902 PetscErrorCode ierr; 1903 PetscSF multi = NULL; 1904 1905 PetscFunctionBegin; 1906 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1907 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1908 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1913 { 1914 PetscInt i, n, nleaves; 1915 const PetscInt *ilocal = NULL; 1916 PetscHSetI seen; 1917 PetscErrorCode ierr; 1918 1919 PetscFunctionBegin; 1920 if (PetscDefined(USE_DEBUG)) { 1921 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1922 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1923 for (i = 0; i < nleaves; i++) { 1924 const PetscInt leaf = ilocal ? ilocal[i] : i; 1925 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1926 } 1927 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1928 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1929 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1930 } 1931 PetscFunctionReturn(0); 1932 } 1933 1934 /*@ 1935 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1936 1937 Input Parameters: 1938 + sfA - The first PetscSF 1939 - sfB - The second PetscSF 1940 1941 Output Parameters: 1942 . sfBA - The composite SF 1943 1944 Level: developer 1945 1946 Notes: 1947 Currently, the two SFs must be defined on congruent communicators and they must be true star 1948 forests, i.e. the same leaf is not connected with different roots. 1949 1950 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1951 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1952 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1953 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1954 1955 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1956 @*/ 1957 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1958 { 1959 PetscErrorCode ierr; 1960 const PetscSFNode *remotePointsA,*remotePointsB; 1961 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1962 const PetscInt *localPointsA,*localPointsB; 1963 PetscInt *localPointsBA; 1964 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1965 PetscBool denseB; 1966 1967 PetscFunctionBegin; 1968 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1969 PetscSFCheckGraphSet(sfA,1); 1970 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1971 PetscSFCheckGraphSet(sfB,2); 1972 PetscCheckSameComm(sfA,1,sfB,2); 1973 PetscValidPointer(sfBA,3); 1974 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1975 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1976 1977 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1978 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1979 if (localPointsA) { 1980 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1981 for (i=0; i<numRootsB; i++) { 1982 reorderedRemotePointsA[i].rank = -1; 1983 reorderedRemotePointsA[i].index = -1; 1984 } 1985 for (i=0; i<numLeavesA; i++) { 1986 if (localPointsA[i] >= numRootsB) continue; 1987 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1988 } 1989 remotePointsA = reorderedRemotePointsA; 1990 } 1991 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1992 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1993 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1994 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1995 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1996 1997 denseB = (PetscBool)!localPointsB; 1998 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1999 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2000 else numLeavesBA++; 2001 } 2002 if (denseB) { 2003 localPointsBA = NULL; 2004 remotePointsBA = leafdataB; 2005 } else { 2006 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2007 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2008 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2009 const PetscInt l = localPointsB ? localPointsB[i] : i; 2010 2011 if (leafdataB[l-minleaf].rank == -1) continue; 2012 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2013 localPointsBA[numLeavesBA] = l; 2014 numLeavesBA++; 2015 } 2016 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2017 } 2018 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2019 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2020 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 /*@ 2025 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2026 2027 Input Parameters: 2028 + sfA - The first PetscSF 2029 - sfB - The second PetscSF 2030 2031 Output Parameters: 2032 . sfBA - The composite SF. 2033 2034 Level: developer 2035 2036 Notes: 2037 Currently, the two SFs must be defined on congruent communicators and they must be true star 2038 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2039 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2040 2041 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2042 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2043 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2044 a Reduce on sfB, on connected roots. 2045 2046 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2047 @*/ 2048 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2049 { 2050 PetscErrorCode ierr; 2051 const PetscSFNode *remotePointsA,*remotePointsB; 2052 PetscSFNode *remotePointsBA; 2053 const PetscInt *localPointsA,*localPointsB; 2054 PetscSFNode *reorderedRemotePointsA = NULL; 2055 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2056 MPI_Op op; 2057 #if defined(PETSC_USE_64BIT_INDICES) 2058 PetscBool iswin; 2059 #endif 2060 2061 PetscFunctionBegin; 2062 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2063 PetscSFCheckGraphSet(sfA,1); 2064 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2065 PetscSFCheckGraphSet(sfB,2); 2066 PetscCheckSameComm(sfA,1,sfB,2); 2067 PetscValidPointer(sfBA,3); 2068 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2069 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2070 2071 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2072 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2073 2074 /* TODO: Check roots of sfB have degree of 1 */ 2075 /* Once we implement it, we can replace the MPI_MAXLOC 2076 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 2077 We use MPI_MAXLOC only to have a deterministic output from this routine if 2078 the root condition is not meet. 2079 */ 2080 op = MPI_MAXLOC; 2081 #if defined(PETSC_USE_64BIT_INDICES) 2082 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2083 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2084 if (iswin) op = MPIU_REPLACE; 2085 #endif 2086 2087 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2088 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2089 for (i=0; i<maxleaf - minleaf + 1; i++) { 2090 reorderedRemotePointsA[i].rank = -1; 2091 reorderedRemotePointsA[i].index = -1; 2092 } 2093 if (localPointsA) { 2094 for (i=0; i<numLeavesA; i++) { 2095 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2096 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2097 } 2098 } else { 2099 for (i=0; i<numLeavesA; i++) { 2100 if (i > maxleaf || i < minleaf) continue; 2101 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2102 } 2103 } 2104 2105 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2106 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2107 for (i=0; i<numRootsB; i++) { 2108 remotePointsBA[i].rank = -1; 2109 remotePointsBA[i].index = -1; 2110 } 2111 2112 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2113 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2114 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2115 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2116 if (remotePointsBA[i].rank == -1) continue; 2117 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2118 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2119 localPointsBA[numLeavesBA] = i; 2120 numLeavesBA++; 2121 } 2122 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2123 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2124 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 /* 2129 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2130 2131 Input Parameters: 2132 . sf - The global PetscSF 2133 2134 Output Parameters: 2135 . out - The local PetscSF 2136 */ 2137 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2138 { 2139 MPI_Comm comm; 2140 PetscMPIInt myrank; 2141 const PetscInt *ilocal; 2142 const PetscSFNode *iremote; 2143 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2144 PetscSFNode *liremote; 2145 PetscSF lsf; 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2150 if (sf->ops->CreateLocalSF) { 2151 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2152 } else { 2153 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2154 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2155 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2156 2157 /* Find out local edges and build a local SF */ 2158 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2159 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2160 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2161 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2162 2163 for (i=j=0; i<nleaves; i++) { 2164 if (iremote[i].rank == (PetscInt)myrank) { 2165 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2166 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2167 liremote[j].index = iremote[i].index; 2168 j++; 2169 } 2170 } 2171 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2172 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2173 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2174 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2175 *out = lsf; 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2181 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2182 { 2183 PetscErrorCode ierr; 2184 PetscMemType rootmtype,leafmtype; 2185 2186 PetscFunctionBegin; 2187 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2188 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2189 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2190 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2191 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2192 if (sf->ops->BcastToZero) { 2193 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2194 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2195 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2196 PetscFunctionReturn(0); 2197 } 2198 2199