1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_HAVE_CUDA) 6 #include <cuda_runtime.h> 7 #endif 8 9 #if defined(PETSC_HAVE_HIP) 10 #include <hip/hip_runtime.h> 11 #endif 12 13 #if defined(PETSC_USE_DEBUG) 14 # define PetscSFCheckGraphSet(sf,arg) do { \ 15 if (PetscUnlikely(!(sf)->graphset)) \ 16 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 17 } while (0) 18 #else 19 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 20 #endif 21 22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 23 24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 25 { 26 PetscFunctionBegin; 27 PetscValidPointer(type,2); 28 *type = PETSC_MEMTYPE_HOST; 29 #if defined(PETSC_HAVE_CUDA) 30 if (PetscCUDAInitialized && data) { 31 cudaError_t cerr; 32 struct cudaPointerAttributes attr; 33 enum cudaMemoryType mtype; 34 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 35 cudaGetLastError(); /* Reset the last error */ 36 #if (CUDART_VERSION < 10000) 37 mtype = attr.memoryType; 38 #else 39 mtype = attr.type; 40 #endif 41 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 42 } 43 #endif 44 45 #if defined(PETSC_HAVE_HIP) 46 if (PetscHIPInitialized && data) { 47 hipError_t cerr; 48 struct hipPointerAttribute_t attr; 49 enum hipMemoryType mtype; 50 cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 51 hipGetLastError(); /* Reset the last error */ 52 mtype = attr.memoryType; 53 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 54 } 55 #endif 56 PetscFunctionReturn(0); 57 } 58 59 /*@ 60 PetscSFCreate - create a star forest communication context 61 62 Collective 63 64 Input Arguments: 65 . comm - communicator on which the star forest will operate 66 67 Output Arguments: 68 . sf - new star forest context 69 70 Options Database Keys: 71 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 72 . -sf_type window -Use MPI-3 one-sided window for communication 73 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 74 75 Level: intermediate 76 77 Notes: 78 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 79 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 80 SFs are optimized and they have better performance than general SFs. 81 82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 83 @*/ 84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 85 { 86 PetscErrorCode ierr; 87 PetscSF b; 88 89 PetscFunctionBegin; 90 PetscValidPointer(sf,2); 91 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 92 93 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 94 95 b->nroots = -1; 96 b->nleaves = -1; 97 b->minleaf = PETSC_MAX_INT; 98 b->maxleaf = PETSC_MIN_INT; 99 b->nranks = -1; 100 b->rankorder = PETSC_TRUE; 101 b->ingroup = MPI_GROUP_NULL; 102 b->outgroup = MPI_GROUP_NULL; 103 b->graphset = PETSC_FALSE; 104 #if defined(PETSC_HAVE_DEVICE) 105 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 106 b->use_stream_aware_mpi = PETSC_FALSE; 107 b->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 108 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 109 b->backend = PETSCSF_BACKEND_KOKKOS; 110 #elif defined(PETSC_HAVE_CUDA) 111 b->backend = PETSCSF_BACKEND_CUDA; 112 #endif 113 #endif 114 *sf = b; 115 PetscFunctionReturn(0); 116 } 117 118 /*@ 119 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 120 121 Collective 122 123 Input Arguments: 124 . sf - star forest 125 126 Level: advanced 127 128 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 129 @*/ 130 PetscErrorCode PetscSFReset(PetscSF sf) 131 { 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 136 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 137 sf->nroots = -1; 138 sf->nleaves = -1; 139 sf->minleaf = PETSC_MAX_INT; 140 sf->maxleaf = PETSC_MIN_INT; 141 sf->mine = NULL; 142 sf->remote = NULL; 143 sf->graphset = PETSC_FALSE; 144 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 145 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 146 sf->nranks = -1; 147 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 148 #if defined(PETSC_HAVE_DEVICE) 149 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 150 #endif 151 sf->degreeknown = PETSC_FALSE; 152 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 153 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 154 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 155 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 156 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 157 sf->setupcalled = PETSC_FALSE; 158 PetscFunctionReturn(0); 159 } 160 161 /*@C 162 PetscSFSetType - Set the PetscSF communication implementation 163 164 Collective on PetscSF 165 166 Input Parameters: 167 + sf - the PetscSF context 168 - type - a known method 169 170 Options Database Key: 171 . -sf_type <type> - Sets the method; use -help for a list 172 of available methods (for instance, window, basic, neighbor) 173 174 Notes: 175 See "include/petscsf.h" for available methods (for instance) 176 + PETSCSFWINDOW - MPI-2/3 one-sided 177 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 178 179 Level: intermediate 180 181 .seealso: PetscSFType, PetscSFCreate() 182 @*/ 183 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 184 { 185 PetscErrorCode ierr,(*r)(PetscSF); 186 PetscBool match; 187 188 PetscFunctionBegin; 189 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 190 PetscValidCharPointer(type,2); 191 192 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 193 if (match) PetscFunctionReturn(0); 194 195 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 196 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 197 /* Destroy the previous PetscSF implementation context */ 198 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 199 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 200 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 201 ierr = (*r)(sf);CHKERRQ(ierr); 202 PetscFunctionReturn(0); 203 } 204 205 /*@C 206 PetscSFGetType - Get the PetscSF communication implementation 207 208 Not Collective 209 210 Input Parameter: 211 . sf - the PetscSF context 212 213 Output Parameter: 214 . type - the PetscSF type name 215 216 Level: intermediate 217 218 .seealso: PetscSFSetType(), PetscSFCreate() 219 @*/ 220 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 221 { 222 PetscFunctionBegin; 223 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 224 PetscValidPointer(type,2); 225 *type = ((PetscObject)sf)->type_name; 226 PetscFunctionReturn(0); 227 } 228 229 /*@ 230 PetscSFDestroy - destroy star forest 231 232 Collective 233 234 Input Arguments: 235 . sf - address of star forest 236 237 Level: intermediate 238 239 .seealso: PetscSFCreate(), PetscSFReset() 240 @*/ 241 PetscErrorCode PetscSFDestroy(PetscSF *sf) 242 { 243 PetscErrorCode ierr; 244 245 PetscFunctionBegin; 246 if (!*sf) PetscFunctionReturn(0); 247 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 248 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 249 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 250 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 251 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 252 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRQ(ierr);} 253 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 254 PetscFunctionReturn(0); 255 } 256 257 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 258 { 259 PetscInt i, nleaves; 260 PetscMPIInt size; 261 const PetscInt *ilocal; 262 const PetscSFNode *iremote; 263 PetscErrorCode ierr; 264 265 PetscFunctionBegin; 266 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 267 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 268 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 269 for (i = 0; i < nleaves; i++) { 270 const PetscInt rank = iremote[i].rank; 271 const PetscInt remote = iremote[i].index; 272 const PetscInt leaf = ilocal ? ilocal[i] : i; 273 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 274 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 275 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 276 } 277 PetscFunctionReturn(0); 278 } 279 280 281 282 283 /*@ 284 PetscSFSetUp - set up communication structures 285 286 Collective 287 288 Input Arguments: 289 . sf - star forest communication object 290 291 Level: beginner 292 293 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 294 @*/ 295 PetscErrorCode PetscSFSetUp(PetscSF sf) 296 { 297 PetscErrorCode ierr; 298 299 PetscFunctionBegin; 300 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 301 PetscSFCheckGraphSet(sf,1); 302 if (sf->setupcalled) PetscFunctionReturn(0); 303 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 304 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 305 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 306 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 307 #if defined(PETSC_HAVE_CUDA) 308 if (sf->backend == PETSCSF_BACKEND_CUDA) { 309 sf->ops->Malloc = PetscSFMalloc_Cuda; 310 sf->ops->Free = PetscSFFree_Cuda; 311 } 312 #endif 313 314 #if defined(PETSC_HAVE_KOKKOS) 315 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 316 sf->ops->Malloc = PetscSFMalloc_Kokkos; 317 sf->ops->Free = PetscSFFree_Kokkos; 318 } 319 #endif 320 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 321 sf->setupcalled = PETSC_TRUE; 322 PetscFunctionReturn(0); 323 } 324 325 /*@ 326 PetscSFSetFromOptions - set PetscSF options using the options database 327 328 Logically Collective 329 330 Input Arguments: 331 . sf - star forest 332 333 Options Database Keys: 334 + -sf_type - implementation type, see PetscSFSetType() 335 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 336 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 337 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 338 If true, this option only works with -use_gpu_aware_mpi 1. 339 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 340 If true, this option only works with -use_gpu_aware_mpi 1. 341 342 - -sf_backend cuda | kokkos -Select the device backend SF uses. Currently SF has two backends: cuda and Kokkos. 343 On CUDA devices, one can choose cuda or kokkos with the default being cuda. On other devices, 344 the only available is kokkos. 345 346 Level: intermediate 347 @*/ 348 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 349 { 350 PetscSFType deft; 351 char type[256]; 352 PetscErrorCode ierr; 353 PetscBool flg; 354 355 PetscFunctionBegin; 356 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 357 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 358 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 359 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 360 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 361 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 362 #if defined(PETSC_HAVE_DEVICE) 363 { 364 char backendstr[32] = {0}; 365 PetscBool isCuda = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 366 /* Change the defaults set in PetscSFCreate() with command line options */ 367 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 368 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 369 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 370 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 371 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 372 #if defined(PETSC_HAVE_CUDA) 373 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 374 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 375 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported on cuda devices. You may choose cuda or kokkos (if installed)", backendstr); 376 #elif defined(PETSC_HAVE_KOKKOS) 377 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 378 #endif 379 } 380 #endif 381 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 382 ierr = PetscOptionsEnd();CHKERRQ(ierr); 383 PetscFunctionReturn(0); 384 } 385 386 /*@ 387 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 388 389 Logically Collective 390 391 Input Arguments: 392 + sf - star forest 393 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 394 395 Level: advanced 396 397 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 398 @*/ 399 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 400 { 401 PetscFunctionBegin; 402 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 403 PetscValidLogicalCollectiveBool(sf,flg,2); 404 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 405 sf->rankorder = flg; 406 PetscFunctionReturn(0); 407 } 408 409 /*@ 410 PetscSFSetGraph - Set a parallel star forest 411 412 Collective 413 414 Input Arguments: 415 + sf - star forest 416 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 417 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 418 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 419 during setup in debug mode) 420 . localmode - copy mode for ilocal 421 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 422 during setup in debug mode) 423 - remotemode - copy mode for iremote 424 425 Level: intermediate 426 427 Notes: 428 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 429 430 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 431 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 432 needed 433 434 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 435 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 436 437 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 438 @*/ 439 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 440 { 441 PetscErrorCode ierr; 442 443 PetscFunctionBegin; 444 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 445 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 446 if (nleaves > 0) PetscValidPointer(iremote,6); 447 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 448 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 449 450 if (sf->nroots >= 0) { /* Reset only if graph already set */ 451 ierr = PetscSFReset(sf);CHKERRQ(ierr); 452 } 453 454 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 455 456 sf->nroots = nroots; 457 sf->nleaves = nleaves; 458 459 if (nleaves && ilocal) { 460 PetscInt i; 461 PetscInt minleaf = PETSC_MAX_INT; 462 PetscInt maxleaf = PETSC_MIN_INT; 463 int contiguous = 1; 464 for (i=0; i<nleaves; i++) { 465 minleaf = PetscMin(minleaf,ilocal[i]); 466 maxleaf = PetscMax(maxleaf,ilocal[i]); 467 contiguous &= (ilocal[i] == i); 468 } 469 sf->minleaf = minleaf; 470 sf->maxleaf = maxleaf; 471 if (contiguous) { 472 if (localmode == PETSC_OWN_POINTER) { 473 ierr = PetscFree(ilocal);CHKERRQ(ierr); 474 } 475 ilocal = NULL; 476 } 477 } else { 478 sf->minleaf = 0; 479 sf->maxleaf = nleaves - 1; 480 } 481 482 if (ilocal) { 483 switch (localmode) { 484 case PETSC_COPY_VALUES: 485 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 486 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 487 sf->mine = sf->mine_alloc; 488 break; 489 case PETSC_OWN_POINTER: 490 sf->mine_alloc = (PetscInt*)ilocal; 491 sf->mine = sf->mine_alloc; 492 break; 493 case PETSC_USE_POINTER: 494 sf->mine_alloc = NULL; 495 sf->mine = (PetscInt*)ilocal; 496 break; 497 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 498 } 499 } 500 501 switch (remotemode) { 502 case PETSC_COPY_VALUES: 503 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 504 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 505 sf->remote = sf->remote_alloc; 506 break; 507 case PETSC_OWN_POINTER: 508 sf->remote_alloc = (PetscSFNode*)iremote; 509 sf->remote = sf->remote_alloc; 510 break; 511 case PETSC_USE_POINTER: 512 sf->remote_alloc = NULL; 513 sf->remote = (PetscSFNode*)iremote; 514 break; 515 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 516 } 517 518 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 519 sf->graphset = PETSC_TRUE; 520 PetscFunctionReturn(0); 521 } 522 523 /*@ 524 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 525 526 Collective 527 528 Input Parameters: 529 + sf - The PetscSF 530 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 531 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 532 533 Notes: 534 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 535 n and N are local and global sizes of x respectively. 536 537 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 538 sequential vectors y on all ranks. 539 540 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 541 sequential vector y on rank 0. 542 543 In above cases, entries of x are roots and entries of y are leaves. 544 545 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 546 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 547 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 548 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 549 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 550 551 In this case, roots and leaves are symmetric. 552 553 Level: intermediate 554 @*/ 555 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 556 { 557 MPI_Comm comm; 558 PetscInt n,N,res[2]; 559 PetscMPIInt rank,size; 560 PetscSFType type; 561 PetscErrorCode ierr; 562 563 PetscFunctionBegin; 564 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 565 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 566 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 567 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 568 569 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 570 type = PETSCSFALLTOALL; 571 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 572 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 573 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 574 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 575 } else { 576 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 577 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 578 res[0] = n; 579 res[1] = -n; 580 /* Check if n are same over all ranks so that we can optimize it */ 581 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 582 if (res[0] == -res[1]) { /* same n */ 583 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 584 } else { 585 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 586 } 587 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 588 } 589 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 590 591 sf->pattern = pattern; 592 sf->mine = NULL; /* Contiguous */ 593 594 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 595 Also set other easy stuff. 596 */ 597 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 598 sf->nleaves = N; 599 sf->nroots = n; 600 sf->nranks = size; 601 sf->minleaf = 0; 602 sf->maxleaf = N - 1; 603 } else if (pattern == PETSCSF_PATTERN_GATHER) { 604 sf->nleaves = rank ? 0 : N; 605 sf->nroots = n; 606 sf->nranks = rank ? 0 : size; 607 sf->minleaf = 0; 608 sf->maxleaf = rank ? -1 : N - 1; 609 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 610 sf->nleaves = size; 611 sf->nroots = size; 612 sf->nranks = size; 613 sf->minleaf = 0; 614 sf->maxleaf = size - 1; 615 } 616 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 617 sf->graphset = PETSC_TRUE; 618 PetscFunctionReturn(0); 619 } 620 621 /*@ 622 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 623 624 Collective 625 626 Input Arguments: 627 628 . sf - star forest to invert 629 630 Output Arguments: 631 . isf - inverse of sf 632 Level: advanced 633 634 Notes: 635 All roots must have degree 1. 636 637 The local space may be a permutation, but cannot be sparse. 638 639 .seealso: PetscSFSetGraph() 640 @*/ 641 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 642 { 643 PetscErrorCode ierr; 644 PetscMPIInt rank; 645 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 646 const PetscInt *ilocal; 647 PetscSFNode *roots,*leaves; 648 649 PetscFunctionBegin; 650 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 651 PetscSFCheckGraphSet(sf,1); 652 PetscValidPointer(isf,2); 653 654 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 655 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 656 657 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 658 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 659 for (i=0; i<maxlocal; i++) { 660 leaves[i].rank = rank; 661 leaves[i].index = i; 662 } 663 for (i=0; i <nroots; i++) { 664 roots[i].rank = -1; 665 roots[i].index = -1; 666 } 667 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 668 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 669 670 /* Check whether our leaves are sparse */ 671 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 672 if (count == nroots) newilocal = NULL; 673 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 674 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 675 for (i=0,count=0; i<nroots; i++) { 676 if (roots[i].rank >= 0) { 677 newilocal[count] = i; 678 roots[count].rank = roots[i].rank; 679 roots[count].index = roots[i].index; 680 count++; 681 } 682 } 683 } 684 685 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 686 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 687 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 /*@ 692 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 693 694 Collective 695 696 Input Arguments: 697 + sf - communication object to duplicate 698 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 699 700 Output Arguments: 701 . newsf - new communication object 702 703 Level: beginner 704 705 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 706 @*/ 707 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 708 { 709 PetscSFType type; 710 PetscErrorCode ierr; 711 MPI_Datatype dtype=MPIU_SCALAR; 712 713 PetscFunctionBegin; 714 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 715 PetscValidLogicalCollectiveEnum(sf,opt,2); 716 PetscValidPointer(newsf,3); 717 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 718 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 719 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 720 if (opt == PETSCSF_DUPLICATE_GRAPH) { 721 PetscSFCheckGraphSet(sf,1); 722 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 723 PetscInt nroots,nleaves; 724 const PetscInt *ilocal; 725 const PetscSFNode *iremote; 726 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 727 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 728 } else { 729 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 730 } 731 } 732 /* Since oldtype is committed, so is newtype, according to MPI */ 733 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRQ(ierr);} 734 (*newsf)->vscat.bs = sf->vscat.bs; 735 (*newsf)->vscat.unit = dtype; 736 (*newsf)->vscat.to_n = sf->vscat.to_n; 737 (*newsf)->vscat.from_n = sf->vscat.from_n; 738 /* Do not copy lsf. Build it on demand since it is rarely used */ 739 740 #if defined(PETSC_HAVE_DEVICE) 741 (*newsf)->backend = sf->backend; 742 (*newsf)->use_default_stream = sf->use_default_stream; 743 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 744 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 745 #endif 746 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 747 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 748 PetscFunctionReturn(0); 749 } 750 751 /*@C 752 PetscSFGetGraph - Get the graph specifying a parallel star forest 753 754 Not Collective 755 756 Input Arguments: 757 . sf - star forest 758 759 Output Arguments: 760 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 761 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 762 . ilocal - locations of leaves in leafdata buffers 763 - iremote - remote locations of root vertices for each leaf on the current process 764 765 Notes: 766 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 767 768 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 769 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 770 771 Level: intermediate 772 773 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 774 @*/ 775 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 776 { 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 781 if (sf->ops->GetGraph) { 782 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 783 } else { 784 if (nroots) *nroots = sf->nroots; 785 if (nleaves) *nleaves = sf->nleaves; 786 if (ilocal) *ilocal = sf->mine; 787 if (iremote) *iremote = sf->remote; 788 } 789 PetscFunctionReturn(0); 790 } 791 792 /*@ 793 PetscSFGetLeafRange - Get the active leaf ranges 794 795 Not Collective 796 797 Input Arguments: 798 . sf - star forest 799 800 Output Arguments: 801 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 802 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 803 804 Level: developer 805 806 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 807 @*/ 808 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 809 { 810 PetscFunctionBegin; 811 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 812 PetscSFCheckGraphSet(sf,1); 813 if (minleaf) *minleaf = sf->minleaf; 814 if (maxleaf) *maxleaf = sf->maxleaf; 815 PetscFunctionReturn(0); 816 } 817 818 /*@C 819 PetscSFViewFromOptions - View from Options 820 821 Collective on PetscSF 822 823 Input Parameters: 824 + A - the star forest 825 . obj - Optional object 826 - name - command line option 827 828 Level: intermediate 829 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 830 @*/ 831 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 832 { 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 837 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /*@C 842 PetscSFView - view a star forest 843 844 Collective 845 846 Input Arguments: 847 + sf - star forest 848 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 849 850 Level: beginner 851 852 .seealso: PetscSFCreate(), PetscSFSetGraph() 853 @*/ 854 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 855 { 856 PetscErrorCode ierr; 857 PetscBool iascii; 858 PetscViewerFormat format; 859 860 PetscFunctionBegin; 861 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 862 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 863 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 864 PetscCheckSameComm(sf,1,viewer,2); 865 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 866 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 867 if (iascii) { 868 PetscMPIInt rank; 869 PetscInt ii,i,j; 870 871 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 872 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 873 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 874 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 875 if (!sf->graphset) { 876 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 877 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 878 PetscFunctionReturn(0); 879 } 880 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 881 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 882 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 883 for (i=0; i<sf->nleaves; i++) { 884 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 885 } 886 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 887 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 888 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 889 PetscMPIInt *tmpranks,*perm; 890 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 891 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 892 for (i=0; i<sf->nranks; i++) perm[i] = i; 893 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 895 for (ii=0; ii<sf->nranks; ii++) { 896 i = perm[ii]; 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 898 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 900 } 901 } 902 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 903 } 904 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 905 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 906 } 907 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 908 } 909 PetscFunctionReturn(0); 910 } 911 912 /*@C 913 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 914 915 Not Collective 916 917 Input Arguments: 918 . sf - star forest 919 920 Output Arguments: 921 + nranks - number of ranks referenced by local part 922 . ranks - array of ranks 923 . roffset - offset in rmine/rremote for each rank (length nranks+1) 924 . rmine - concatenated array holding local indices referencing each remote rank 925 - rremote - concatenated array holding remote indices referenced for each remote rank 926 927 Level: developer 928 929 .seealso: PetscSFGetLeafRanks() 930 @*/ 931 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 932 { 933 PetscErrorCode ierr; 934 935 PetscFunctionBegin; 936 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 937 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 938 if (sf->ops->GetRootRanks) { 939 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 940 } else { 941 /* The generic implementation */ 942 if (nranks) *nranks = sf->nranks; 943 if (ranks) *ranks = sf->ranks; 944 if (roffset) *roffset = sf->roffset; 945 if (rmine) *rmine = sf->rmine; 946 if (rremote) *rremote = sf->rremote; 947 } 948 PetscFunctionReturn(0); 949 } 950 951 /*@C 952 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 953 954 Not Collective 955 956 Input Arguments: 957 . sf - star forest 958 959 Output Arguments: 960 + niranks - number of leaf ranks referencing roots on this process 961 . iranks - array of ranks 962 . ioffset - offset in irootloc for each rank (length niranks+1) 963 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 964 965 Level: developer 966 967 .seealso: PetscSFGetRootRanks() 968 @*/ 969 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 970 { 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 975 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 976 if (sf->ops->GetLeafRanks) { 977 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 978 } else { 979 PetscSFType type; 980 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 981 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 982 } 983 PetscFunctionReturn(0); 984 } 985 986 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 987 PetscInt i; 988 for (i=0; i<n; i++) { 989 if (needle == list[i]) return PETSC_TRUE; 990 } 991 return PETSC_FALSE; 992 } 993 994 /*@C 995 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 996 997 Collective 998 999 Input Arguments: 1000 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1001 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1002 1003 Level: developer 1004 1005 .seealso: PetscSFGetRootRanks() 1006 @*/ 1007 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1008 { 1009 PetscErrorCode ierr; 1010 PetscTable table; 1011 PetscTablePosition pos; 1012 PetscMPIInt size,groupsize,*groupranks; 1013 PetscInt *rcount,*ranks; 1014 PetscInt i, irank = -1,orank = -1; 1015 1016 PetscFunctionBegin; 1017 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1018 PetscSFCheckGraphSet(sf,1); 1019 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 1020 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1021 for (i=0; i<sf->nleaves; i++) { 1022 /* Log 1-based rank */ 1023 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1024 } 1025 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1026 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1027 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1028 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1029 for (i=0; i<sf->nranks; i++) { 1030 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1031 ranks[i]--; /* Convert back to 0-based */ 1032 } 1033 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1034 1035 /* We expect that dgroup is reliably "small" while nranks could be large */ 1036 { 1037 MPI_Group group = MPI_GROUP_NULL; 1038 PetscMPIInt *dgroupranks; 1039 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1040 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 1041 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1042 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1043 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1044 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 1045 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1046 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1047 } 1048 1049 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1050 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1051 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1052 if (InList(ranks[i],groupsize,groupranks)) break; 1053 } 1054 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1055 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1056 } 1057 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1058 PetscInt tmprank,tmpcount; 1059 1060 tmprank = ranks[i]; 1061 tmpcount = rcount[i]; 1062 ranks[i] = ranks[sf->ndranks]; 1063 rcount[i] = rcount[sf->ndranks]; 1064 ranks[sf->ndranks] = tmprank; 1065 rcount[sf->ndranks] = tmpcount; 1066 sf->ndranks++; 1067 } 1068 } 1069 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1070 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1071 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1072 sf->roffset[0] = 0; 1073 for (i=0; i<sf->nranks; i++) { 1074 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1075 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1076 rcount[i] = 0; 1077 } 1078 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1079 /* short circuit */ 1080 if (orank != sf->remote[i].rank) { 1081 /* Search for index of iremote[i].rank in sf->ranks */ 1082 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1083 if (irank < 0) { 1084 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1085 if (irank >= 0) irank += sf->ndranks; 1086 } 1087 orank = sf->remote[i].rank; 1088 } 1089 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1090 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1091 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1092 rcount[irank]++; 1093 } 1094 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 /*@C 1099 PetscSFGetGroups - gets incoming and outgoing process groups 1100 1101 Collective 1102 1103 Input Argument: 1104 . sf - star forest 1105 1106 Output Arguments: 1107 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1108 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1109 1110 Level: developer 1111 1112 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1113 @*/ 1114 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1115 { 1116 PetscErrorCode ierr; 1117 MPI_Group group = MPI_GROUP_NULL; 1118 1119 PetscFunctionBegin; 1120 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1121 if (sf->ingroup == MPI_GROUP_NULL) { 1122 PetscInt i; 1123 const PetscInt *indegree; 1124 PetscMPIInt rank,*outranks,*inranks; 1125 PetscSFNode *remote; 1126 PetscSF bgcount; 1127 1128 /* Compute the number of incoming ranks */ 1129 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1130 for (i=0; i<sf->nranks; i++) { 1131 remote[i].rank = sf->ranks[i]; 1132 remote[i].index = 0; 1133 } 1134 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1135 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1136 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1137 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1138 /* Enumerate the incoming ranks */ 1139 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1140 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1141 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1142 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1143 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1144 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1145 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1146 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1147 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1148 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1149 } 1150 *incoming = sf->ingroup; 1151 1152 if (sf->outgroup == MPI_GROUP_NULL) { 1153 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1154 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1155 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1156 } 1157 *outgoing = sf->outgroup; 1158 PetscFunctionReturn(0); 1159 } 1160 1161 /*@ 1162 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1163 1164 Collective 1165 1166 Input Argument: 1167 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1168 1169 Output Arguments: 1170 . multi - star forest with split roots, such that each root has degree exactly 1 1171 1172 Level: developer 1173 1174 Notes: 1175 1176 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1177 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1178 edge, it is a candidate for future optimization that might involve its removal. 1179 1180 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1181 @*/ 1182 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1183 { 1184 PetscErrorCode ierr; 1185 1186 PetscFunctionBegin; 1187 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1188 PetscValidPointer(multi,2); 1189 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1190 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1191 *multi = sf->multi; 1192 PetscFunctionReturn(0); 1193 } 1194 if (!sf->multi) { 1195 const PetscInt *indegree; 1196 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1197 PetscSFNode *remote; 1198 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1199 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1200 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1201 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1202 inoffset[0] = 0; 1203 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1204 for (i=0; i<maxlocal; i++) outones[i] = 1; 1205 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1206 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1207 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1208 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1209 for (i=0; i<sf->nroots; i++) { 1210 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1211 } 1212 } 1213 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1214 for (i=0; i<sf->nleaves; i++) { 1215 remote[i].rank = sf->remote[i].rank; 1216 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1217 } 1218 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1219 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1220 if (sf->rankorder) { /* Sort the ranks */ 1221 PetscMPIInt rank; 1222 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1223 PetscSFNode *newremote; 1224 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1225 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1226 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1227 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1228 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1229 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1230 /* Sort the incoming ranks at each vertex, build the inverse map */ 1231 for (i=0; i<sf->nroots; i++) { 1232 PetscInt j; 1233 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1234 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1235 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1236 } 1237 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1238 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1239 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1240 for (i=0; i<sf->nleaves; i++) { 1241 newremote[i].rank = sf->remote[i].rank; 1242 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1243 } 1244 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1245 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1246 } 1247 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1248 } 1249 *multi = sf->multi; 1250 PetscFunctionReturn(0); 1251 } 1252 1253 /*@C 1254 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1255 1256 Collective 1257 1258 Input Arguments: 1259 + sf - original star forest 1260 . nselected - number of selected roots on this process 1261 - selected - indices of the selected roots on this process 1262 1263 Output Arguments: 1264 . esf - new star forest 1265 1266 Level: advanced 1267 1268 Note: 1269 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1270 be done by calling PetscSFGetGraph(). 1271 1272 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1273 @*/ 1274 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1275 { 1276 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1277 const PetscInt *ilocal; 1278 signed char *rootdata,*leafdata,*leafmem; 1279 const PetscSFNode *iremote; 1280 PetscSFNode *new_iremote; 1281 MPI_Comm comm; 1282 PetscErrorCode ierr; 1283 1284 PetscFunctionBegin; 1285 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1286 PetscSFCheckGraphSet(sf,1); 1287 if (nselected) PetscValidPointer(selected,3); 1288 PetscValidPointer(esf,4); 1289 1290 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1291 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1292 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1293 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1294 1295 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1296 PetscBool dups; 1297 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1298 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1299 for (i=0; i<nselected; i++) 1300 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1301 } 1302 1303 if (sf->ops->CreateEmbeddedSF) { 1304 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1305 } else { 1306 /* A generic version of creating embedded sf */ 1307 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1308 maxlocal = maxleaf - minleaf + 1; 1309 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1310 leafdata = leafmem - minleaf; 1311 /* Tag selected roots and bcast to leaves */ 1312 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1313 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1314 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1315 1316 /* Build esf with leaves that are still connected */ 1317 esf_nleaves = 0; 1318 for (i=0; i<nleaves; i++) { 1319 j = ilocal ? ilocal[i] : i; 1320 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1321 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1322 */ 1323 esf_nleaves += (leafdata[j] ? 1 : 0); 1324 } 1325 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1326 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1327 for (i=n=0; i<nleaves; i++) { 1328 j = ilocal ? ilocal[i] : i; 1329 if (leafdata[j]) { 1330 new_ilocal[n] = j; 1331 new_iremote[n].rank = iremote[i].rank; 1332 new_iremote[n].index = iremote[i].index; 1333 ++n; 1334 } 1335 } 1336 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1337 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1338 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1339 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1340 } 1341 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 /*@C 1346 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1347 1348 Collective 1349 1350 Input Arguments: 1351 + sf - original star forest 1352 . nselected - number of selected leaves on this process 1353 - selected - indices of the selected leaves on this process 1354 1355 Output Arguments: 1356 . newsf - new star forest 1357 1358 Level: advanced 1359 1360 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1361 @*/ 1362 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1363 { 1364 const PetscSFNode *iremote; 1365 PetscSFNode *new_iremote; 1366 const PetscInt *ilocal; 1367 PetscInt i,nroots,*leaves,*new_ilocal; 1368 MPI_Comm comm; 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1373 PetscSFCheckGraphSet(sf,1); 1374 if (nselected) PetscValidPointer(selected,3); 1375 PetscValidPointer(newsf,4); 1376 1377 /* Uniq selected[] and put results in leaves[] */ 1378 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1379 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1380 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1381 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1382 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1383 1384 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1385 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1386 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1387 } else { 1388 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1389 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1390 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1391 for (i=0; i<nselected; ++i) { 1392 const PetscInt l = leaves[i]; 1393 new_ilocal[i] = ilocal ? ilocal[l] : l; 1394 new_iremote[i].rank = iremote[l].rank; 1395 new_iremote[i].index = iremote[l].index; 1396 } 1397 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1398 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(leaves);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 /*@C 1405 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1406 1407 Collective on PetscSF 1408 1409 Input Arguments: 1410 + sf - star forest on which to communicate 1411 . unit - data type associated with each node 1412 . rootdata - buffer to broadcast 1413 - op - operation to use for reduction 1414 1415 Output Arguments: 1416 . leafdata - buffer to be reduced with values from each leaf's respective root 1417 1418 Level: intermediate 1419 1420 Notes: 1421 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1422 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1423 use PetscSFBcastAndOpWithMemTypeBegin() instead. 1424 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin() 1425 @*/ 1426 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1427 { 1428 PetscErrorCode ierr; 1429 PetscMemType rootmtype,leafmtype; 1430 1431 PetscFunctionBegin; 1432 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1433 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1434 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1435 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1436 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1437 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1438 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1439 PetscFunctionReturn(0); 1440 } 1441 1442 /*@C 1443 PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd() 1444 1445 Collective on PetscSF 1446 1447 Input Arguments: 1448 + sf - star forest on which to communicate 1449 . unit - data type associated with each node 1450 . rootmtype - memory type of rootdata 1451 . rootdata - buffer to broadcast 1452 . leafmtype - memory type of leafdata 1453 - op - operation to use for reduction 1454 1455 Output Arguments: 1456 . leafdata - buffer to be reduced with values from each leaf's respective root 1457 1458 Level: intermediate 1459 1460 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin() 1461 @*/ 1462 PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1463 { 1464 PetscErrorCode ierr; 1465 1466 PetscFunctionBegin; 1467 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1468 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1469 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1470 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1471 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1472 PetscFunctionReturn(0); 1473 } 1474 1475 /*@C 1476 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1477 1478 Collective 1479 1480 Input Arguments: 1481 + sf - star forest 1482 . unit - data type 1483 . rootdata - buffer to broadcast 1484 - op - operation to use for reduction 1485 1486 Output Arguments: 1487 . leafdata - buffer to be reduced with values from each leaf's respective root 1488 1489 Level: intermediate 1490 1491 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1492 @*/ 1493 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1494 { 1495 PetscErrorCode ierr; 1496 1497 PetscFunctionBegin; 1498 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1499 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);} 1500 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1501 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);} 1502 PetscFunctionReturn(0); 1503 } 1504 1505 /*@C 1506 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1507 1508 Collective 1509 1510 Input Arguments: 1511 + sf - star forest 1512 . unit - data type 1513 . leafdata - values to reduce 1514 - op - reduction operation 1515 1516 Output Arguments: 1517 . rootdata - result of reduction of values from all leaves of each root 1518 1519 Level: intermediate 1520 1521 Notes: 1522 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1523 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1524 use PetscSFReduceWithMemTypeBegin() instead. 1525 1526 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1527 @*/ 1528 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1529 { 1530 PetscErrorCode ierr; 1531 PetscMemType rootmtype,leafmtype; 1532 1533 PetscFunctionBegin; 1534 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1535 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1536 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1537 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1538 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1539 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1540 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1541 PetscFunctionReturn(0); 1542 } 1543 1544 /*@C 1545 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1546 1547 Collective 1548 1549 Input Arguments: 1550 + sf - star forest 1551 . unit - data type 1552 . leafmtype - memory type of leafdata 1553 . leafdata - values to reduce 1554 . rootmtype - memory type of rootdata 1555 - op - reduction operation 1556 1557 Output Arguments: 1558 . rootdata - result of reduction of values from all leaves of each root 1559 1560 Level: intermediate 1561 1562 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1563 @*/ 1564 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1565 { 1566 PetscErrorCode ierr; 1567 1568 PetscFunctionBegin; 1569 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1570 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1571 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1572 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1573 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1574 PetscFunctionReturn(0); 1575 } 1576 1577 /*@C 1578 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1579 1580 Collective 1581 1582 Input Arguments: 1583 + sf - star forest 1584 . unit - data type 1585 . leafdata - values to reduce 1586 - op - reduction operation 1587 1588 Output Arguments: 1589 . rootdata - result of reduction of values from all leaves of each root 1590 1591 Level: intermediate 1592 1593 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1594 @*/ 1595 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1596 { 1597 PetscErrorCode ierr; 1598 1599 PetscFunctionBegin; 1600 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1601 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1602 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1603 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1604 PetscFunctionReturn(0); 1605 } 1606 1607 /*@C 1608 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1609 1610 Collective 1611 1612 Input Arguments: 1613 + sf - star forest 1614 . unit - data type 1615 . leafdata - leaf values to use in reduction 1616 - op - operation to use for reduction 1617 1618 Output Arguments: 1619 + rootdata - root values to be updated, input state is seen by first process to perform an update 1620 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1621 1622 Level: advanced 1623 1624 Note: 1625 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1626 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1627 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1628 integers. 1629 1630 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1631 @*/ 1632 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1633 { 1634 PetscErrorCode ierr; 1635 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1636 1637 PetscFunctionBegin; 1638 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1639 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1640 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1641 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1642 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1643 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1644 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1645 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1646 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 /*@C 1651 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1652 1653 Collective 1654 1655 Input Arguments: 1656 + sf - star forest 1657 . unit - data type 1658 . leafdata - leaf values to use in reduction 1659 - op - operation to use for reduction 1660 1661 Output Arguments: 1662 + rootdata - root values to be updated, input state is seen by first process to perform an update 1663 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1664 1665 Level: advanced 1666 1667 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1668 @*/ 1669 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1670 { 1671 PetscErrorCode ierr; 1672 1673 PetscFunctionBegin; 1674 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1675 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1676 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1677 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 /*@C 1682 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1683 1684 Collective 1685 1686 Input Arguments: 1687 . sf - star forest 1688 1689 Output Arguments: 1690 . degree - degree of each root vertex 1691 1692 Level: advanced 1693 1694 Notes: 1695 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1696 1697 .seealso: PetscSFGatherBegin() 1698 @*/ 1699 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1700 { 1701 PetscErrorCode ierr; 1702 1703 PetscFunctionBegin; 1704 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1705 PetscSFCheckGraphSet(sf,1); 1706 PetscValidPointer(degree,2); 1707 if (!sf->degreeknown) { 1708 PetscInt i, nroots = sf->nroots, maxlocal; 1709 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1710 maxlocal = sf->maxleaf-sf->minleaf+1; 1711 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1712 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1713 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1714 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1715 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1716 } 1717 *degree = NULL; 1718 PetscFunctionReturn(0); 1719 } 1720 1721 /*@C 1722 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1723 1724 Collective 1725 1726 Input Arguments: 1727 . sf - star forest 1728 1729 Output Arguments: 1730 . degree - degree of each root vertex 1731 1732 Level: developer 1733 1734 Notes: 1735 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1736 1737 .seealso: 1738 @*/ 1739 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1740 { 1741 PetscErrorCode ierr; 1742 1743 PetscFunctionBegin; 1744 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1745 PetscSFCheckGraphSet(sf,1); 1746 PetscValidPointer(degree,2); 1747 if (!sf->degreeknown) { 1748 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1749 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1750 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1751 sf->degreeknown = PETSC_TRUE; 1752 } 1753 *degree = sf->degree; 1754 PetscFunctionReturn(0); 1755 } 1756 1757 1758 /*@C 1759 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1760 Each multi-root is assigned index of the corresponding original root. 1761 1762 Collective 1763 1764 Input Arguments: 1765 + sf - star forest 1766 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1767 1768 Output Arguments: 1769 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1770 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1771 1772 Level: developer 1773 1774 Notes: 1775 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1776 1777 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1778 @*/ 1779 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1780 { 1781 PetscSF msf; 1782 PetscInt i, j, k, nroots, nmroots; 1783 PetscErrorCode ierr; 1784 1785 PetscFunctionBegin; 1786 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1787 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1788 if (nroots) PetscValidIntPointer(degree,2); 1789 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1790 PetscValidPointer(multiRootsOrigNumbering,4); 1791 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1792 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1793 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1794 for (i=0,j=0,k=0; i<nroots; i++) { 1795 if (!degree[i]) continue; 1796 for (j=0; j<degree[i]; j++,k++) { 1797 (*multiRootsOrigNumbering)[k] = i; 1798 } 1799 } 1800 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1801 if (nMultiRoots) *nMultiRoots = nmroots; 1802 PetscFunctionReturn(0); 1803 } 1804 1805 /*@C 1806 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1807 1808 Collective 1809 1810 Input Arguments: 1811 + sf - star forest 1812 . unit - data type 1813 - leafdata - leaf data to gather to roots 1814 1815 Output Argument: 1816 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1817 1818 Level: intermediate 1819 1820 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1821 @*/ 1822 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1823 { 1824 PetscErrorCode ierr; 1825 PetscSF multi = NULL; 1826 1827 PetscFunctionBegin; 1828 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1829 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1830 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1831 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 /*@C 1836 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1837 1838 Collective 1839 1840 Input Arguments: 1841 + sf - star forest 1842 . unit - data type 1843 - leafdata - leaf data to gather to roots 1844 1845 Output Argument: 1846 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1847 1848 Level: intermediate 1849 1850 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1851 @*/ 1852 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1853 { 1854 PetscErrorCode ierr; 1855 PetscSF multi = NULL; 1856 1857 PetscFunctionBegin; 1858 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1859 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1860 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1861 PetscFunctionReturn(0); 1862 } 1863 1864 /*@C 1865 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1866 1867 Collective 1868 1869 Input Arguments: 1870 + sf - star forest 1871 . unit - data type 1872 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1873 1874 Output Argument: 1875 . leafdata - leaf data to be update with personal data from each respective root 1876 1877 Level: intermediate 1878 1879 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1880 @*/ 1881 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1882 { 1883 PetscErrorCode ierr; 1884 PetscSF multi = NULL; 1885 1886 PetscFunctionBegin; 1887 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1888 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1889 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1890 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 /*@C 1895 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1896 1897 Collective 1898 1899 Input Arguments: 1900 + sf - star forest 1901 . unit - data type 1902 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1903 1904 Output Argument: 1905 . leafdata - leaf data to be update with personal data from each respective root 1906 1907 Level: intermediate 1908 1909 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1910 @*/ 1911 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1912 { 1913 PetscErrorCode ierr; 1914 PetscSF multi = NULL; 1915 1916 PetscFunctionBegin; 1917 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1918 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1919 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1924 { 1925 PetscInt i, n, nleaves; 1926 const PetscInt *ilocal = NULL; 1927 PetscHSetI seen; 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 if (PetscDefined(USE_DEBUG)) { 1932 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1933 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1934 for (i = 0; i < nleaves; i++) { 1935 const PetscInt leaf = ilocal ? ilocal[i] : i; 1936 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1937 } 1938 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1939 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1940 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1941 } 1942 PetscFunctionReturn(0); 1943 } 1944 1945 /*@ 1946 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1947 1948 Input Parameters: 1949 + sfA - The first PetscSF 1950 - sfB - The second PetscSF 1951 1952 Output Parameters: 1953 . sfBA - The composite SF 1954 1955 Level: developer 1956 1957 Notes: 1958 Currently, the two SFs must be defined on congruent communicators and they must be true star 1959 forests, i.e. the same leaf is not connected with different roots. 1960 1961 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1962 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1963 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1964 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1965 1966 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1967 @*/ 1968 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1969 { 1970 PetscErrorCode ierr; 1971 const PetscSFNode *remotePointsA,*remotePointsB; 1972 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1973 const PetscInt *localPointsA,*localPointsB; 1974 PetscInt *localPointsBA; 1975 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1976 PetscBool denseB; 1977 1978 PetscFunctionBegin; 1979 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1980 PetscSFCheckGraphSet(sfA,1); 1981 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1982 PetscSFCheckGraphSet(sfB,2); 1983 PetscCheckSameComm(sfA,1,sfB,2); 1984 PetscValidPointer(sfBA,3); 1985 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1986 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1987 1988 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1989 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1990 if (localPointsA) { 1991 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1992 for (i=0; i<numRootsB; i++) { 1993 reorderedRemotePointsA[i].rank = -1; 1994 reorderedRemotePointsA[i].index = -1; 1995 } 1996 for (i=0; i<numLeavesA; i++) { 1997 if (localPointsA[i] >= numRootsB) continue; 1998 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1999 } 2000 remotePointsA = reorderedRemotePointsA; 2001 } 2002 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2003 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2004 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 2005 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 2006 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2007 2008 denseB = (PetscBool)!localPointsB; 2009 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2010 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2011 else numLeavesBA++; 2012 } 2013 if (denseB) { 2014 localPointsBA = NULL; 2015 remotePointsBA = leafdataB; 2016 } else { 2017 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2018 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2019 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2020 const PetscInt l = localPointsB ? localPointsB[i] : i; 2021 2022 if (leafdataB[l-minleaf].rank == -1) continue; 2023 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2024 localPointsBA[numLeavesBA] = l; 2025 numLeavesBA++; 2026 } 2027 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2028 } 2029 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2030 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2031 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 /*@ 2036 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2037 2038 Input Parameters: 2039 + sfA - The first PetscSF 2040 - sfB - The second PetscSF 2041 2042 Output Parameters: 2043 . sfBA - The composite SF. 2044 2045 Level: developer 2046 2047 Notes: 2048 Currently, the two SFs must be defined on congruent communicators and they must be true star 2049 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2050 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2051 2052 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2053 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2054 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2055 a Reduce on sfB, on connected roots. 2056 2057 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2058 @*/ 2059 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2060 { 2061 PetscErrorCode ierr; 2062 const PetscSFNode *remotePointsA,*remotePointsB; 2063 PetscSFNode *remotePointsBA; 2064 const PetscInt *localPointsA,*localPointsB; 2065 PetscSFNode *reorderedRemotePointsA = NULL; 2066 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2067 MPI_Op op; 2068 #if defined(PETSC_USE_64BIT_INDICES) 2069 PetscBool iswin; 2070 #endif 2071 2072 PetscFunctionBegin; 2073 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2074 PetscSFCheckGraphSet(sfA,1); 2075 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2076 PetscSFCheckGraphSet(sfB,2); 2077 PetscCheckSameComm(sfA,1,sfB,2); 2078 PetscValidPointer(sfBA,3); 2079 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2080 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2081 2082 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2083 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2084 2085 /* TODO: Check roots of sfB have degree of 1 */ 2086 /* Once we implement it, we can replace the MPI_MAXLOC 2087 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 2088 We use MPI_MAXLOC only to have a deterministic output from this routine if 2089 the root condition is not meet. 2090 */ 2091 op = MPI_MAXLOC; 2092 #if defined(PETSC_USE_64BIT_INDICES) 2093 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2094 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2095 if (iswin) op = MPIU_REPLACE; 2096 #endif 2097 2098 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2099 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2100 for (i=0; i<maxleaf - minleaf + 1; i++) { 2101 reorderedRemotePointsA[i].rank = -1; 2102 reorderedRemotePointsA[i].index = -1; 2103 } 2104 if (localPointsA) { 2105 for (i=0; i<numLeavesA; i++) { 2106 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2107 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2108 } 2109 } else { 2110 for (i=0; i<numLeavesA; i++) { 2111 if (i > maxleaf || i < minleaf) continue; 2112 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2113 } 2114 } 2115 2116 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2117 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2118 for (i=0; i<numRootsB; i++) { 2119 remotePointsBA[i].rank = -1; 2120 remotePointsBA[i].index = -1; 2121 } 2122 2123 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2124 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2125 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2126 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2127 if (remotePointsBA[i].rank == -1) continue; 2128 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2129 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2130 localPointsBA[numLeavesBA] = i; 2131 numLeavesBA++; 2132 } 2133 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2134 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2135 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /* 2140 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2141 2142 Input Parameters: 2143 . sf - The global PetscSF 2144 2145 Output Parameters: 2146 . out - The local PetscSF 2147 */ 2148 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2149 { 2150 MPI_Comm comm; 2151 PetscMPIInt myrank; 2152 const PetscInt *ilocal; 2153 const PetscSFNode *iremote; 2154 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2155 PetscSFNode *liremote; 2156 PetscSF lsf; 2157 PetscErrorCode ierr; 2158 2159 PetscFunctionBegin; 2160 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2161 if (sf->ops->CreateLocalSF) { 2162 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2163 } else { 2164 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2165 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2166 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2167 2168 /* Find out local edges and build a local SF */ 2169 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2170 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2171 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2172 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2173 2174 for (i=j=0; i<nleaves; i++) { 2175 if (iremote[i].rank == (PetscInt)myrank) { 2176 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2177 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2178 liremote[j].index = iremote[i].index; 2179 j++; 2180 } 2181 } 2182 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2183 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2184 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2185 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2186 *out = lsf; 2187 } 2188 PetscFunctionReturn(0); 2189 } 2190 2191 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2192 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2193 { 2194 PetscErrorCode ierr; 2195 PetscMemType rootmtype,leafmtype; 2196 2197 PetscFunctionBegin; 2198 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2199 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2200 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2201 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2202 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2203 if (sf->ops->BcastToZero) { 2204 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2205 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2206 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 2210