1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_HAVE_CUDA) 6 #include <cuda_runtime.h> 7 #endif 8 9 #if defined(PETSC_HAVE_HIP) 10 #include <hip/hip_runtime.h> 11 #endif 12 13 #if defined(PETSC_USE_DEBUG) 14 # define PetscSFCheckGraphSet(sf,arg) do { \ 15 if (PetscUnlikely(!(sf)->graphset)) \ 16 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 17 } while (0) 18 #else 19 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 20 #endif 21 22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 23 24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 25 { 26 PetscFunctionBegin; 27 PetscValidPointer(type,2); 28 *type = PETSC_MEMTYPE_HOST; 29 #if defined(PETSC_HAVE_CUDA) 30 if (PetscCUDAInitialized && data) { 31 cudaError_t cerr; 32 struct cudaPointerAttributes attr; 33 enum cudaMemoryType mtype; 34 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 35 cudaGetLastError(); /* Reset the last error */ 36 #if (CUDART_VERSION < 10000) 37 mtype = attr.memoryType; 38 #else 39 mtype = attr.type; 40 #endif 41 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 42 } 43 #endif 44 45 #if defined(PETSC_HAVE_HIP) 46 if (PetscHIPInitialized && data) { 47 hipError_t cerr; 48 struct hipPointerAttribute_t attr; 49 enum hipMemoryType mtype; 50 cerr = hipPointerGetAttributes(&attr,data); 51 hipGetLastError(); /* Reset the last error */ 52 mtype = attr.memoryType; 53 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 54 } 55 #endif 56 PetscFunctionReturn(0); 57 } 58 59 /*@ 60 PetscSFCreate - create a star forest communication context 61 62 Collective 63 64 Input Arguments: 65 . comm - communicator on which the star forest will operate 66 67 Output Arguments: 68 . sf - new star forest context 69 70 Options Database Keys: 71 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 72 . -sf_type window -Use MPI-3 one-sided window for communication 73 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 74 75 Level: intermediate 76 77 Notes: 78 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 79 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 80 SFs are optimized and they have better performance than general SFs. 81 82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 83 @*/ 84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 85 { 86 PetscErrorCode ierr; 87 PetscSF b; 88 89 PetscFunctionBegin; 90 PetscValidPointer(sf,2); 91 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 92 93 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 94 95 b->nroots = -1; 96 b->nleaves = -1; 97 b->minleaf = PETSC_MAX_INT; 98 b->maxleaf = PETSC_MIN_INT; 99 b->nranks = -1; 100 b->rankorder = PETSC_TRUE; 101 b->ingroup = MPI_GROUP_NULL; 102 b->outgroup = MPI_GROUP_NULL; 103 b->graphset = PETSC_FALSE; 104 #if defined(PETSC_HAVE_DEVICE) 105 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 106 b->use_stream_aware_mpi = PETSC_FALSE; 107 b->unknown_input_stream= PETSC_FALSE; 108 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 109 b->backend = PETSCSF_BACKEND_KOKKOS; 110 #elif defined(PETSC_HAVE_CUDA) 111 b->backend = PETSCSF_BACKEND_CUDA; 112 #elif defined(PETSC_HAVE_HIP) 113 b->backend = PETSCSF_BACKEND_HIP; 114 #endif 115 116 #if defined(PETSC_HAVE_NVSHMEM) 117 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 118 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 119 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 120 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 121 #endif 122 #endif 123 b->vscat.from_n = -1; 124 b->vscat.to_n = -1; 125 b->vscat.unit = MPIU_SCALAR; 126 *sf = b; 127 PetscFunctionReturn(0); 128 } 129 130 /*@ 131 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 132 133 Collective 134 135 Input Arguments: 136 . sf - star forest 137 138 Level: advanced 139 140 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 141 @*/ 142 PetscErrorCode PetscSFReset(PetscSF sf) 143 { 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 148 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 149 sf->nroots = -1; 150 sf->nleaves = -1; 151 sf->minleaf = PETSC_MAX_INT; 152 sf->maxleaf = PETSC_MIN_INT; 153 sf->mine = NULL; 154 sf->remote = NULL; 155 sf->graphset = PETSC_FALSE; 156 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 157 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 158 sf->nranks = -1; 159 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 160 sf->degreeknown = PETSC_FALSE; 161 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 162 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 163 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 164 if (sf->multi) sf->multi->multi = NULL; 165 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 166 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 167 168 #if defined(PETSC_HAVE_DEVICE) 169 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 170 #endif 171 172 sf->setupcalled = PETSC_FALSE; 173 PetscFunctionReturn(0); 174 } 175 176 /*@C 177 PetscSFSetType - Set the PetscSF communication implementation 178 179 Collective on PetscSF 180 181 Input Parameters: 182 + sf - the PetscSF context 183 - type - a known method 184 185 Options Database Key: 186 . -sf_type <type> - Sets the method; use -help for a list 187 of available methods (for instance, window, basic, neighbor) 188 189 Notes: 190 See "include/petscsf.h" for available methods (for instance) 191 + PETSCSFWINDOW - MPI-2/3 one-sided 192 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 193 194 Level: intermediate 195 196 .seealso: PetscSFType, PetscSFCreate() 197 @*/ 198 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 199 { 200 PetscErrorCode ierr,(*r)(PetscSF); 201 PetscBool match; 202 203 PetscFunctionBegin; 204 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 205 PetscValidCharPointer(type,2); 206 207 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 208 if (match) PetscFunctionReturn(0); 209 210 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 211 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 212 /* Destroy the previous PetscSF implementation context */ 213 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 214 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 215 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 216 ierr = (*r)(sf);CHKERRQ(ierr); 217 PetscFunctionReturn(0); 218 } 219 220 /*@C 221 PetscSFGetType - Get the PetscSF communication implementation 222 223 Not Collective 224 225 Input Parameter: 226 . sf - the PetscSF context 227 228 Output Parameter: 229 . type - the PetscSF type name 230 231 Level: intermediate 232 233 .seealso: PetscSFSetType(), PetscSFCreate() 234 @*/ 235 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 236 { 237 PetscFunctionBegin; 238 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 239 PetscValidPointer(type,2); 240 *type = ((PetscObject)sf)->type_name; 241 PetscFunctionReturn(0); 242 } 243 244 /*@C 245 PetscSFDestroy - destroy star forest 246 247 Collective 248 249 Input Arguments: 250 . sf - address of star forest 251 252 Level: intermediate 253 254 .seealso: PetscSFCreate(), PetscSFReset() 255 @*/ 256 PetscErrorCode PetscSFDestroy(PetscSF *sf) 257 { 258 PetscErrorCode ierr; 259 260 PetscFunctionBegin; 261 if (!*sf) PetscFunctionReturn(0); 262 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 263 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 264 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 265 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 266 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 267 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 268 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 269 PetscFunctionReturn(0); 270 } 271 272 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 273 { 274 PetscInt i, nleaves; 275 PetscMPIInt size; 276 const PetscInt *ilocal; 277 const PetscSFNode *iremote; 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 282 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 283 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 284 for (i = 0; i < nleaves; i++) { 285 const PetscInt rank = iremote[i].rank; 286 const PetscInt remote = iremote[i].index; 287 const PetscInt leaf = ilocal ? ilocal[i] : i; 288 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 289 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 290 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 291 } 292 PetscFunctionReturn(0); 293 } 294 295 /*@ 296 PetscSFSetUp - set up communication structures 297 298 Collective 299 300 Input Arguments: 301 . sf - star forest communication object 302 303 Level: beginner 304 305 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 306 @*/ 307 PetscErrorCode PetscSFSetUp(PetscSF sf) 308 { 309 PetscErrorCode ierr; 310 311 PetscFunctionBegin; 312 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 313 PetscSFCheckGraphSet(sf,1); 314 if (sf->setupcalled) PetscFunctionReturn(0); 315 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 316 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 317 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 318 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 319 #if defined(PETSC_HAVE_CUDA) 320 if (sf->backend == PETSCSF_BACKEND_CUDA) { 321 sf->ops->Malloc = PetscSFMalloc_CUDA; 322 sf->ops->Free = PetscSFFree_CUDA; 323 } 324 #endif 325 #if defined(PETSC_HAVE_HIP) 326 if (sf->backend == PETSCSF_BACKEND_HIP) { 327 sf->ops->Malloc = PetscSFMalloc_HIP; 328 sf->ops->Free = PetscSFFree_HIP; 329 } 330 #endif 331 332 # 333 #if defined(PETSC_HAVE_KOKKOS) 334 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 335 sf->ops->Malloc = PetscSFMalloc_Kokkos; 336 sf->ops->Free = PetscSFFree_Kokkos; 337 } 338 #endif 339 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 340 sf->setupcalled = PETSC_TRUE; 341 PetscFunctionReturn(0); 342 } 343 344 /*@ 345 PetscSFSetFromOptions - set PetscSF options using the options database 346 347 Logically Collective 348 349 Input Arguments: 350 . sf - star forest 351 352 Options Database Keys: 353 + -sf_type - implementation type, see PetscSFSetType() 354 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 355 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 356 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 357 If true, this option only works with -use_gpu_aware_mpi 1. 358 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 359 If true, this option only works with -use_gpu_aware_mpi 1. 360 361 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 362 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 363 the only available is kokkos. 364 365 Level: intermediate 366 @*/ 367 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 368 { 369 PetscSFType deft; 370 char type[256]; 371 PetscErrorCode ierr; 372 PetscBool flg; 373 374 PetscFunctionBegin; 375 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 376 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 377 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 378 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 379 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 380 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 381 #if defined(PETSC_HAVE_DEVICE) 382 { 383 char backendstr[32] = {0}; 384 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 385 /* Change the defaults set in PetscSFCreate() with command line options */ 386 ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 387 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 388 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 389 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 390 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 391 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 392 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 393 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 394 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 395 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 396 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 397 #elif defined(PETSC_HAVE_KOKKOS) 398 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 399 #endif 400 } 401 #endif 402 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 403 ierr = PetscOptionsEnd();CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 /*@ 408 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 409 410 Logically Collective 411 412 Input Arguments: 413 + sf - star forest 414 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 415 416 Level: advanced 417 418 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 419 @*/ 420 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 421 { 422 PetscFunctionBegin; 423 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 424 PetscValidLogicalCollectiveBool(sf,flg,2); 425 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 426 sf->rankorder = flg; 427 PetscFunctionReturn(0); 428 } 429 430 /*@ 431 PetscSFSetGraph - Set a parallel star forest 432 433 Collective 434 435 Input Arguments: 436 + sf - star forest 437 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 438 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 439 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 440 during setup in debug mode) 441 . localmode - copy mode for ilocal 442 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 443 during setup in debug mode) 444 - remotemode - copy mode for iremote 445 446 Level: intermediate 447 448 Notes: 449 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 450 451 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 452 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 453 needed 454 455 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 456 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 457 458 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 459 @*/ 460 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 461 { 462 PetscErrorCode ierr; 463 464 PetscFunctionBegin; 465 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 466 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 467 if (nleaves > 0) PetscValidPointer(iremote,6); 468 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 469 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 470 471 if (sf->nroots >= 0) { /* Reset only if graph already set */ 472 ierr = PetscSFReset(sf);CHKERRQ(ierr); 473 } 474 475 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 476 477 sf->nroots = nroots; 478 sf->nleaves = nleaves; 479 480 if (nleaves && ilocal) { 481 PetscInt i; 482 PetscInt minleaf = PETSC_MAX_INT; 483 PetscInt maxleaf = PETSC_MIN_INT; 484 int contiguous = 1; 485 for (i=0; i<nleaves; i++) { 486 minleaf = PetscMin(minleaf,ilocal[i]); 487 maxleaf = PetscMax(maxleaf,ilocal[i]); 488 contiguous &= (ilocal[i] == i); 489 } 490 sf->minleaf = minleaf; 491 sf->maxleaf = maxleaf; 492 if (contiguous) { 493 if (localmode == PETSC_OWN_POINTER) { 494 ierr = PetscFree(ilocal);CHKERRQ(ierr); 495 } 496 ilocal = NULL; 497 } 498 } else { 499 sf->minleaf = 0; 500 sf->maxleaf = nleaves - 1; 501 } 502 503 if (ilocal) { 504 switch (localmode) { 505 case PETSC_COPY_VALUES: 506 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 507 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 508 sf->mine = sf->mine_alloc; 509 break; 510 case PETSC_OWN_POINTER: 511 sf->mine_alloc = (PetscInt*)ilocal; 512 sf->mine = sf->mine_alloc; 513 break; 514 case PETSC_USE_POINTER: 515 sf->mine_alloc = NULL; 516 sf->mine = (PetscInt*)ilocal; 517 break; 518 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 519 } 520 } 521 522 switch (remotemode) { 523 case PETSC_COPY_VALUES: 524 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 525 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 526 sf->remote = sf->remote_alloc; 527 break; 528 case PETSC_OWN_POINTER: 529 sf->remote_alloc = (PetscSFNode*)iremote; 530 sf->remote = sf->remote_alloc; 531 break; 532 case PETSC_USE_POINTER: 533 sf->remote_alloc = NULL; 534 sf->remote = (PetscSFNode*)iremote; 535 break; 536 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 537 } 538 539 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 540 sf->graphset = PETSC_TRUE; 541 PetscFunctionReturn(0); 542 } 543 544 /*@ 545 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 546 547 Collective 548 549 Input Parameters: 550 + sf - The PetscSF 551 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 552 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 553 554 Notes: 555 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 556 n and N are local and global sizes of x respectively. 557 558 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 559 sequential vectors y on all ranks. 560 561 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 562 sequential vector y on rank 0. 563 564 In above cases, entries of x are roots and entries of y are leaves. 565 566 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 567 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 568 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 569 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 570 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 571 572 In this case, roots and leaves are symmetric. 573 574 Level: intermediate 575 @*/ 576 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 577 { 578 MPI_Comm comm; 579 PetscInt n,N,res[2]; 580 PetscMPIInt rank,size; 581 PetscSFType type; 582 PetscErrorCode ierr; 583 584 PetscFunctionBegin; 585 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 586 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 587 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 588 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 589 590 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 591 type = PETSCSFALLTOALL; 592 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 593 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 594 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 595 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 596 } else { 597 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 598 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 599 res[0] = n; 600 res[1] = -n; 601 /* Check if n are same over all ranks so that we can optimize it */ 602 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 603 if (res[0] == -res[1]) { /* same n */ 604 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 605 } else { 606 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 607 } 608 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 609 } 610 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 611 612 sf->pattern = pattern; 613 sf->mine = NULL; /* Contiguous */ 614 615 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 616 Also set other easy stuff. 617 */ 618 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 619 sf->nleaves = N; 620 sf->nroots = n; 621 sf->nranks = size; 622 sf->minleaf = 0; 623 sf->maxleaf = N - 1; 624 } else if (pattern == PETSCSF_PATTERN_GATHER) { 625 sf->nleaves = rank ? 0 : N; 626 sf->nroots = n; 627 sf->nranks = rank ? 0 : size; 628 sf->minleaf = 0; 629 sf->maxleaf = rank ? -1 : N - 1; 630 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 631 sf->nleaves = size; 632 sf->nroots = size; 633 sf->nranks = size; 634 sf->minleaf = 0; 635 sf->maxleaf = size - 1; 636 } 637 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 638 sf->graphset = PETSC_TRUE; 639 PetscFunctionReturn(0); 640 } 641 642 /*@ 643 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 644 645 Collective 646 647 Input Arguments: 648 649 . sf - star forest to invert 650 651 Output Arguments: 652 . isf - inverse of sf 653 Level: advanced 654 655 Notes: 656 All roots must have degree 1. 657 658 The local space may be a permutation, but cannot be sparse. 659 660 .seealso: PetscSFSetGraph() 661 @*/ 662 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 663 { 664 PetscErrorCode ierr; 665 PetscMPIInt rank; 666 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 667 const PetscInt *ilocal; 668 PetscSFNode *roots,*leaves; 669 670 PetscFunctionBegin; 671 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 672 PetscSFCheckGraphSet(sf,1); 673 PetscValidPointer(isf,2); 674 675 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 676 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 677 678 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 679 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 680 for (i=0; i<maxlocal; i++) { 681 leaves[i].rank = rank; 682 leaves[i].index = i; 683 } 684 for (i=0; i <nroots; i++) { 685 roots[i].rank = -1; 686 roots[i].index = -1; 687 } 688 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 689 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 690 691 /* Check whether our leaves are sparse */ 692 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 693 if (count == nroots) newilocal = NULL; 694 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 695 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 696 for (i=0,count=0; i<nroots; i++) { 697 if (roots[i].rank >= 0) { 698 newilocal[count] = i; 699 roots[count].rank = roots[i].rank; 700 roots[count].index = roots[i].index; 701 count++; 702 } 703 } 704 } 705 706 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 707 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 708 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 /*@ 713 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 714 715 Collective 716 717 Input Arguments: 718 + sf - communication object to duplicate 719 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 720 721 Output Arguments: 722 . newsf - new communication object 723 724 Level: beginner 725 726 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 727 @*/ 728 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 729 { 730 PetscSFType type; 731 PetscErrorCode ierr; 732 MPI_Datatype dtype=MPIU_SCALAR; 733 734 PetscFunctionBegin; 735 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 736 PetscValidLogicalCollectiveEnum(sf,opt,2); 737 PetscValidPointer(newsf,3); 738 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 739 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 740 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 741 if (opt == PETSCSF_DUPLICATE_GRAPH) { 742 PetscSFCheckGraphSet(sf,1); 743 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 744 PetscInt nroots,nleaves; 745 const PetscInt *ilocal; 746 const PetscSFNode *iremote; 747 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 748 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 749 } else { 750 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 751 } 752 } 753 /* Since oldtype is committed, so is newtype, according to MPI */ 754 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 755 (*newsf)->vscat.bs = sf->vscat.bs; 756 (*newsf)->vscat.unit = dtype; 757 (*newsf)->vscat.to_n = sf->vscat.to_n; 758 (*newsf)->vscat.from_n = sf->vscat.from_n; 759 /* Do not copy lsf. Build it on demand since it is rarely used */ 760 761 #if defined(PETSC_HAVE_DEVICE) 762 (*newsf)->backend = sf->backend; 763 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 764 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 765 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 766 #endif 767 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 768 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 769 PetscFunctionReturn(0); 770 } 771 772 /*@C 773 PetscSFGetGraph - Get the graph specifying a parallel star forest 774 775 Not Collective 776 777 Input Arguments: 778 . sf - star forest 779 780 Output Arguments: 781 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 782 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 783 . ilocal - locations of leaves in leafdata buffers 784 - iremote - remote locations of root vertices for each leaf on the current process 785 786 Notes: 787 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 788 789 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 790 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 791 792 Level: intermediate 793 794 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 795 @*/ 796 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 797 { 798 PetscErrorCode ierr; 799 800 PetscFunctionBegin; 801 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 802 if (sf->ops->GetGraph) { 803 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 804 } else { 805 if (nroots) *nroots = sf->nroots; 806 if (nleaves) *nleaves = sf->nleaves; 807 if (ilocal) *ilocal = sf->mine; 808 if (iremote) *iremote = sf->remote; 809 } 810 PetscFunctionReturn(0); 811 } 812 813 /*@ 814 PetscSFGetLeafRange - Get the active leaf ranges 815 816 Not Collective 817 818 Input Arguments: 819 . sf - star forest 820 821 Output Arguments: 822 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 823 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 824 825 Level: developer 826 827 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 828 @*/ 829 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 830 { 831 PetscFunctionBegin; 832 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 833 PetscSFCheckGraphSet(sf,1); 834 if (minleaf) *minleaf = sf->minleaf; 835 if (maxleaf) *maxleaf = sf->maxleaf; 836 PetscFunctionReturn(0); 837 } 838 839 /*@C 840 PetscSFViewFromOptions - View from Options 841 842 Collective on PetscSF 843 844 Input Parameters: 845 + A - the star forest 846 . obj - Optional object 847 - name - command line option 848 849 Level: intermediate 850 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 851 @*/ 852 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 853 { 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 858 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 /*@C 863 PetscSFView - view a star forest 864 865 Collective 866 867 Input Arguments: 868 + sf - star forest 869 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 870 871 Level: beginner 872 873 .seealso: PetscSFCreate(), PetscSFSetGraph() 874 @*/ 875 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 876 { 877 PetscErrorCode ierr; 878 PetscBool iascii; 879 PetscViewerFormat format; 880 881 PetscFunctionBegin; 882 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 883 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 884 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 885 PetscCheckSameComm(sf,1,viewer,2); 886 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 887 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 888 if (iascii) { 889 PetscMPIInt rank; 890 PetscInt ii,i,j; 891 892 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 893 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 894 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 895 if (!sf->graphset) { 896 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 897 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 901 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 902 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 903 for (i=0; i<sf->nleaves; i++) { 904 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 905 } 906 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 907 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 908 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 909 PetscMPIInt *tmpranks,*perm; 910 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 911 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 912 for (i=0; i<sf->nranks; i++) perm[i] = i; 913 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 914 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 915 for (ii=0; ii<sf->nranks; ii++) { 916 i = perm[ii]; 917 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 918 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 919 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 920 } 921 } 922 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 923 } 924 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 925 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 926 } 927 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 928 } 929 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 930 PetscFunctionReturn(0); 931 } 932 933 /*@C 934 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 935 936 Not Collective 937 938 Input Arguments: 939 . sf - star forest 940 941 Output Arguments: 942 + nranks - number of ranks referenced by local part 943 . ranks - array of ranks 944 . roffset - offset in rmine/rremote for each rank (length nranks+1) 945 . rmine - concatenated array holding local indices referencing each remote rank 946 - rremote - concatenated array holding remote indices referenced for each remote rank 947 948 Level: developer 949 950 .seealso: PetscSFGetLeafRanks() 951 @*/ 952 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 953 { 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 958 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 959 if (sf->ops->GetRootRanks) { 960 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 961 } else { 962 /* The generic implementation */ 963 if (nranks) *nranks = sf->nranks; 964 if (ranks) *ranks = sf->ranks; 965 if (roffset) *roffset = sf->roffset; 966 if (rmine) *rmine = sf->rmine; 967 if (rremote) *rremote = sf->rremote; 968 } 969 PetscFunctionReturn(0); 970 } 971 972 /*@C 973 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 974 975 Not Collective 976 977 Input Arguments: 978 . sf - star forest 979 980 Output Arguments: 981 + niranks - number of leaf ranks referencing roots on this process 982 . iranks - array of ranks 983 . ioffset - offset in irootloc for each rank (length niranks+1) 984 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 985 986 Level: developer 987 988 .seealso: PetscSFGetRootRanks() 989 @*/ 990 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 991 { 992 PetscErrorCode ierr; 993 994 PetscFunctionBegin; 995 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 996 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 997 if (sf->ops->GetLeafRanks) { 998 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 999 } else { 1000 PetscSFType type; 1001 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 1002 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 1008 PetscInt i; 1009 for (i=0; i<n; i++) { 1010 if (needle == list[i]) return PETSC_TRUE; 1011 } 1012 return PETSC_FALSE; 1013 } 1014 1015 /*@C 1016 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 1017 1018 Collective 1019 1020 Input Arguments: 1021 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1022 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1023 1024 Level: developer 1025 1026 .seealso: PetscSFGetRootRanks() 1027 @*/ 1028 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1029 { 1030 PetscErrorCode ierr; 1031 PetscTable table; 1032 PetscTablePosition pos; 1033 PetscMPIInt size,groupsize,*groupranks; 1034 PetscInt *rcount,*ranks; 1035 PetscInt i, irank = -1,orank = -1; 1036 1037 PetscFunctionBegin; 1038 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1039 PetscSFCheckGraphSet(sf,1); 1040 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1041 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1042 for (i=0; i<sf->nleaves; i++) { 1043 /* Log 1-based rank */ 1044 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1045 } 1046 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1047 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1048 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1049 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1050 for (i=0; i<sf->nranks; i++) { 1051 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1052 ranks[i]--; /* Convert back to 0-based */ 1053 } 1054 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1055 1056 /* We expect that dgroup is reliably "small" while nranks could be large */ 1057 { 1058 MPI_Group group = MPI_GROUP_NULL; 1059 PetscMPIInt *dgroupranks; 1060 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1061 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1062 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1063 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1064 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1065 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1066 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1067 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1068 } 1069 1070 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1071 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1072 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1073 if (InList(ranks[i],groupsize,groupranks)) break; 1074 } 1075 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1076 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1077 } 1078 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1079 PetscInt tmprank,tmpcount; 1080 1081 tmprank = ranks[i]; 1082 tmpcount = rcount[i]; 1083 ranks[i] = ranks[sf->ndranks]; 1084 rcount[i] = rcount[sf->ndranks]; 1085 ranks[sf->ndranks] = tmprank; 1086 rcount[sf->ndranks] = tmpcount; 1087 sf->ndranks++; 1088 } 1089 } 1090 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1091 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1092 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1093 sf->roffset[0] = 0; 1094 for (i=0; i<sf->nranks; i++) { 1095 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1096 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1097 rcount[i] = 0; 1098 } 1099 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1100 /* short circuit */ 1101 if (orank != sf->remote[i].rank) { 1102 /* Search for index of iremote[i].rank in sf->ranks */ 1103 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1104 if (irank < 0) { 1105 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1106 if (irank >= 0) irank += sf->ndranks; 1107 } 1108 orank = sf->remote[i].rank; 1109 } 1110 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1111 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1112 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1113 rcount[irank]++; 1114 } 1115 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 /*@C 1120 PetscSFGetGroups - gets incoming and outgoing process groups 1121 1122 Collective 1123 1124 Input Argument: 1125 . sf - star forest 1126 1127 Output Arguments: 1128 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1129 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1130 1131 Level: developer 1132 1133 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1134 @*/ 1135 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1136 { 1137 PetscErrorCode ierr; 1138 MPI_Group group = MPI_GROUP_NULL; 1139 1140 PetscFunctionBegin; 1141 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1142 if (sf->ingroup == MPI_GROUP_NULL) { 1143 PetscInt i; 1144 const PetscInt *indegree; 1145 PetscMPIInt rank,*outranks,*inranks; 1146 PetscSFNode *remote; 1147 PetscSF bgcount; 1148 1149 /* Compute the number of incoming ranks */ 1150 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1151 for (i=0; i<sf->nranks; i++) { 1152 remote[i].rank = sf->ranks[i]; 1153 remote[i].index = 0; 1154 } 1155 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1156 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1157 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1158 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1159 /* Enumerate the incoming ranks */ 1160 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1161 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1162 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1163 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1164 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1165 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1166 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1167 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1168 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1169 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1170 } 1171 *incoming = sf->ingroup; 1172 1173 if (sf->outgroup == MPI_GROUP_NULL) { 1174 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1175 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1176 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1177 } 1178 *outgoing = sf->outgroup; 1179 PetscFunctionReturn(0); 1180 } 1181 1182 /*@ 1183 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1184 1185 Collective 1186 1187 Input Argument: 1188 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1189 1190 Output Arguments: 1191 . multi - star forest with split roots, such that each root has degree exactly 1 1192 1193 Level: developer 1194 1195 Notes: 1196 1197 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1198 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1199 edge, it is a candidate for future optimization that might involve its removal. 1200 1201 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1202 @*/ 1203 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1209 PetscValidPointer(multi,2); 1210 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1211 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1212 *multi = sf->multi; 1213 sf->multi->multi = sf->multi; 1214 PetscFunctionReturn(0); 1215 } 1216 if (!sf->multi) { 1217 const PetscInt *indegree; 1218 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1219 PetscSFNode *remote; 1220 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1221 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1222 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1223 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1224 inoffset[0] = 0; 1225 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1226 for (i=0; i<maxlocal; i++) outones[i] = 1; 1227 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1228 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1229 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1230 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1231 for (i=0; i<sf->nroots; i++) { 1232 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1233 } 1234 } 1235 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1236 for (i=0; i<sf->nleaves; i++) { 1237 remote[i].rank = sf->remote[i].rank; 1238 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1239 } 1240 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1241 sf->multi->multi = sf->multi; 1242 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1243 if (sf->rankorder) { /* Sort the ranks */ 1244 PetscMPIInt rank; 1245 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1246 PetscSFNode *newremote; 1247 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1248 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1249 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1250 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1251 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1252 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1253 /* Sort the incoming ranks at each vertex, build the inverse map */ 1254 for (i=0; i<sf->nroots; i++) { 1255 PetscInt j; 1256 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1257 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1258 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1259 } 1260 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1261 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1262 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1263 for (i=0; i<sf->nleaves; i++) { 1264 newremote[i].rank = sf->remote[i].rank; 1265 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1266 } 1267 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1268 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1269 } 1270 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1271 } 1272 *multi = sf->multi; 1273 PetscFunctionReturn(0); 1274 } 1275 1276 /*@C 1277 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1278 1279 Collective 1280 1281 Input Arguments: 1282 + sf - original star forest 1283 . nselected - number of selected roots on this process 1284 - selected - indices of the selected roots on this process 1285 1286 Output Arguments: 1287 . esf - new star forest 1288 1289 Level: advanced 1290 1291 Note: 1292 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1293 be done by calling PetscSFGetGraph(). 1294 1295 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1296 @*/ 1297 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1298 { 1299 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1300 const PetscInt *ilocal; 1301 signed char *rootdata,*leafdata,*leafmem; 1302 const PetscSFNode *iremote; 1303 PetscSFNode *new_iremote; 1304 MPI_Comm comm; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1309 PetscSFCheckGraphSet(sf,1); 1310 if (nselected) PetscValidPointer(selected,3); 1311 PetscValidPointer(esf,4); 1312 1313 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1314 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1315 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1316 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1317 1318 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1319 PetscBool dups; 1320 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1321 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1322 for (i=0; i<nselected; i++) 1323 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1324 } 1325 1326 if (sf->ops->CreateEmbeddedRootSF) { 1327 ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1328 } else { 1329 /* A generic version of creating embedded sf */ 1330 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1331 maxlocal = maxleaf - minleaf + 1; 1332 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1333 leafdata = leafmem - minleaf; 1334 /* Tag selected roots and bcast to leaves */ 1335 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1336 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1337 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1338 1339 /* Build esf with leaves that are still connected */ 1340 esf_nleaves = 0; 1341 for (i=0; i<nleaves; i++) { 1342 j = ilocal ? ilocal[i] : i; 1343 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1344 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1345 */ 1346 esf_nleaves += (leafdata[j] ? 1 : 0); 1347 } 1348 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1349 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1350 for (i=n=0; i<nleaves; i++) { 1351 j = ilocal ? ilocal[i] : i; 1352 if (leafdata[j]) { 1353 new_ilocal[n] = j; 1354 new_iremote[n].rank = iremote[i].rank; 1355 new_iremote[n].index = iremote[i].index; 1356 ++n; 1357 } 1358 } 1359 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1360 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1361 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1362 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1363 } 1364 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 /*@C 1369 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1370 1371 Collective 1372 1373 Input Arguments: 1374 + sf - original star forest 1375 . nselected - number of selected leaves on this process 1376 - selected - indices of the selected leaves on this process 1377 1378 Output Arguments: 1379 . newsf - new star forest 1380 1381 Level: advanced 1382 1383 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1384 @*/ 1385 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1386 { 1387 const PetscSFNode *iremote; 1388 PetscSFNode *new_iremote; 1389 const PetscInt *ilocal; 1390 PetscInt i,nroots,*leaves,*new_ilocal; 1391 MPI_Comm comm; 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1396 PetscSFCheckGraphSet(sf,1); 1397 if (nselected) PetscValidPointer(selected,3); 1398 PetscValidPointer(newsf,4); 1399 1400 /* Uniq selected[] and put results in leaves[] */ 1401 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1402 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1403 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1404 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1405 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1406 1407 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1408 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1409 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1410 } else { 1411 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1412 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1413 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1414 for (i=0; i<nselected; ++i) { 1415 const PetscInt l = leaves[i]; 1416 new_ilocal[i] = ilocal ? ilocal[l] : l; 1417 new_iremote[i].rank = iremote[l].rank; 1418 new_iremote[i].index = iremote[l].index; 1419 } 1420 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1421 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1422 } 1423 ierr = PetscFree(leaves);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 /*@C 1428 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1429 1430 Collective on PetscSF 1431 1432 Input Arguments: 1433 + sf - star forest on which to communicate 1434 . unit - data type associated with each node 1435 . rootdata - buffer to broadcast 1436 - op - operation to use for reduction 1437 1438 Output Arguments: 1439 . leafdata - buffer to be reduced with values from each leaf's respective root 1440 1441 Level: intermediate 1442 1443 Notes: 1444 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1445 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1446 use PetscSFBcastWithMemTypeBegin() instead. 1447 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1448 @*/ 1449 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1450 { 1451 PetscErrorCode ierr; 1452 PetscMemType rootmtype,leafmtype; 1453 1454 PetscFunctionBegin; 1455 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1456 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1457 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1458 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1459 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1460 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1461 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1462 PetscFunctionReturn(0); 1463 } 1464 1465 /*@C 1466 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1467 1468 Collective on PetscSF 1469 1470 Input Arguments: 1471 + sf - star forest on which to communicate 1472 . unit - data type associated with each node 1473 . rootmtype - memory type of rootdata 1474 . rootdata - buffer to broadcast 1475 . leafmtype - memory type of leafdata 1476 - op - operation to use for reduction 1477 1478 Output Arguments: 1479 . leafdata - buffer to be reduced with values from each leaf's respective root 1480 1481 Level: intermediate 1482 1483 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1484 @*/ 1485 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1486 { 1487 PetscErrorCode ierr; 1488 1489 PetscFunctionBegin; 1490 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1491 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1492 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1493 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1494 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1495 PetscFunctionReturn(0); 1496 } 1497 1498 /*@C 1499 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1500 1501 Collective 1502 1503 Input Arguments: 1504 + sf - star forest 1505 . unit - data type 1506 . rootdata - buffer to broadcast 1507 - op - operation to use for reduction 1508 1509 Output Arguments: 1510 . leafdata - buffer to be reduced with values from each leaf's respective root 1511 1512 Level: intermediate 1513 1514 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1515 @*/ 1516 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1517 { 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1522 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1523 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1524 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1525 PetscFunctionReturn(0); 1526 } 1527 1528 /*@C 1529 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1530 1531 Collective 1532 1533 Input Arguments: 1534 + sf - star forest 1535 . unit - data type 1536 . leafdata - values to reduce 1537 - op - reduction operation 1538 1539 Output Arguments: 1540 . rootdata - result of reduction of values from all leaves of each root 1541 1542 Level: intermediate 1543 1544 Notes: 1545 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1546 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1547 use PetscSFReduceWithMemTypeBegin() instead. 1548 1549 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1550 @*/ 1551 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1552 { 1553 PetscErrorCode ierr; 1554 PetscMemType rootmtype,leafmtype; 1555 1556 PetscFunctionBegin; 1557 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1558 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1559 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1560 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1561 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1562 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1563 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1564 PetscFunctionReturn(0); 1565 } 1566 1567 /*@C 1568 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1569 1570 Collective 1571 1572 Input Arguments: 1573 + sf - star forest 1574 . unit - data type 1575 . leafmtype - memory type of leafdata 1576 . leafdata - values to reduce 1577 . rootmtype - memory type of rootdata 1578 - op - reduction operation 1579 1580 Output Arguments: 1581 . rootdata - result of reduction of values from all leaves of each root 1582 1583 Level: intermediate 1584 1585 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1586 @*/ 1587 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1588 { 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1593 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1594 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1595 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1596 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1597 PetscFunctionReturn(0); 1598 } 1599 1600 /*@C 1601 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1602 1603 Collective 1604 1605 Input Arguments: 1606 + sf - star forest 1607 . unit - data type 1608 . leafdata - values to reduce 1609 - op - reduction operation 1610 1611 Output Arguments: 1612 . rootdata - result of reduction of values from all leaves of each root 1613 1614 Level: intermediate 1615 1616 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1617 @*/ 1618 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1619 { 1620 PetscErrorCode ierr; 1621 1622 PetscFunctionBegin; 1623 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1624 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1625 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1626 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1627 PetscFunctionReturn(0); 1628 } 1629 1630 /*@C 1631 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1632 1633 Collective 1634 1635 Input Arguments: 1636 + sf - star forest 1637 . unit - data type 1638 . leafdata - leaf values to use in reduction 1639 - op - operation to use for reduction 1640 1641 Output Arguments: 1642 + rootdata - root values to be updated, input state is seen by first process to perform an update 1643 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1644 1645 Level: advanced 1646 1647 Note: 1648 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1649 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1650 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1651 integers. 1652 1653 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1654 @*/ 1655 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1656 { 1657 PetscErrorCode ierr; 1658 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1659 1660 PetscFunctionBegin; 1661 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1662 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1663 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1664 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1665 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1666 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1667 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1668 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1669 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 /*@C 1674 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1675 1676 Collective 1677 1678 Input Arguments: 1679 + sf - star forest 1680 . unit - data type 1681 . leafdata - leaf values to use in reduction 1682 - op - operation to use for reduction 1683 1684 Output Arguments: 1685 + rootdata - root values to be updated, input state is seen by first process to perform an update 1686 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1687 1688 Level: advanced 1689 1690 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1691 @*/ 1692 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1693 { 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1698 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1699 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1700 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 /*@C 1705 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1706 1707 Collective 1708 1709 Input Arguments: 1710 . sf - star forest 1711 1712 Output Arguments: 1713 . degree - degree of each root vertex 1714 1715 Level: advanced 1716 1717 Notes: 1718 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1719 1720 .seealso: PetscSFGatherBegin() 1721 @*/ 1722 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1723 { 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1728 PetscSFCheckGraphSet(sf,1); 1729 PetscValidPointer(degree,2); 1730 if (!sf->degreeknown) { 1731 PetscInt i, nroots = sf->nroots, maxlocal; 1732 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1733 maxlocal = sf->maxleaf-sf->minleaf+1; 1734 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1735 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1736 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1737 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1738 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1739 } 1740 *degree = NULL; 1741 PetscFunctionReturn(0); 1742 } 1743 1744 /*@C 1745 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1746 1747 Collective 1748 1749 Input Arguments: 1750 . sf - star forest 1751 1752 Output Arguments: 1753 . degree - degree of each root vertex 1754 1755 Level: developer 1756 1757 Notes: 1758 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1759 1760 .seealso: 1761 @*/ 1762 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1763 { 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1768 PetscSFCheckGraphSet(sf,1); 1769 PetscValidPointer(degree,2); 1770 if (!sf->degreeknown) { 1771 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1772 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1773 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1774 sf->degreeknown = PETSC_TRUE; 1775 } 1776 *degree = sf->degree; 1777 PetscFunctionReturn(0); 1778 } 1779 1780 1781 /*@C 1782 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1783 Each multi-root is assigned index of the corresponding original root. 1784 1785 Collective 1786 1787 Input Arguments: 1788 + sf - star forest 1789 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1790 1791 Output Arguments: 1792 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1793 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1794 1795 Level: developer 1796 1797 Notes: 1798 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1799 1800 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1801 @*/ 1802 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1803 { 1804 PetscSF msf; 1805 PetscInt i, j, k, nroots, nmroots; 1806 PetscErrorCode ierr; 1807 1808 PetscFunctionBegin; 1809 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1810 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1811 if (nroots) PetscValidIntPointer(degree,2); 1812 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1813 PetscValidPointer(multiRootsOrigNumbering,4); 1814 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1815 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1816 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1817 for (i=0,j=0,k=0; i<nroots; i++) { 1818 if (!degree[i]) continue; 1819 for (j=0; j<degree[i]; j++,k++) { 1820 (*multiRootsOrigNumbering)[k] = i; 1821 } 1822 } 1823 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1824 if (nMultiRoots) *nMultiRoots = nmroots; 1825 PetscFunctionReturn(0); 1826 } 1827 1828 /*@C 1829 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1830 1831 Collective 1832 1833 Input Arguments: 1834 + sf - star forest 1835 . unit - data type 1836 - leafdata - leaf data to gather to roots 1837 1838 Output Argument: 1839 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1840 1841 Level: intermediate 1842 1843 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1844 @*/ 1845 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1846 { 1847 PetscErrorCode ierr; 1848 PetscSF multi = NULL; 1849 1850 PetscFunctionBegin; 1851 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1852 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1853 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1854 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 /*@C 1859 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1860 1861 Collective 1862 1863 Input Arguments: 1864 + sf - star forest 1865 . unit - data type 1866 - leafdata - leaf data to gather to roots 1867 1868 Output Argument: 1869 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1870 1871 Level: intermediate 1872 1873 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1874 @*/ 1875 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1876 { 1877 PetscErrorCode ierr; 1878 PetscSF multi = NULL; 1879 1880 PetscFunctionBegin; 1881 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1882 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1883 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1884 PetscFunctionReturn(0); 1885 } 1886 1887 /*@C 1888 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1889 1890 Collective 1891 1892 Input Arguments: 1893 + sf - star forest 1894 . unit - data type 1895 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1896 1897 Output Argument: 1898 . leafdata - leaf data to be update with personal data from each respective root 1899 1900 Level: intermediate 1901 1902 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1903 @*/ 1904 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1905 { 1906 PetscErrorCode ierr; 1907 PetscSF multi = NULL; 1908 1909 PetscFunctionBegin; 1910 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1911 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1912 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1913 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1914 PetscFunctionReturn(0); 1915 } 1916 1917 /*@C 1918 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1919 1920 Collective 1921 1922 Input Arguments: 1923 + sf - star forest 1924 . unit - data type 1925 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1926 1927 Output Argument: 1928 . leafdata - leaf data to be update with personal data from each respective root 1929 1930 Level: intermediate 1931 1932 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1933 @*/ 1934 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1935 { 1936 PetscErrorCode ierr; 1937 PetscSF multi = NULL; 1938 1939 PetscFunctionBegin; 1940 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1941 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1942 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1947 { 1948 PetscInt i, n, nleaves; 1949 const PetscInt *ilocal = NULL; 1950 PetscHSetI seen; 1951 PetscErrorCode ierr; 1952 1953 PetscFunctionBegin; 1954 if (PetscDefined(USE_DEBUG)) { 1955 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1956 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1957 for (i = 0; i < nleaves; i++) { 1958 const PetscInt leaf = ilocal ? ilocal[i] : i; 1959 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1960 } 1961 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1962 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1963 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1964 } 1965 PetscFunctionReturn(0); 1966 } 1967 1968 /*@ 1969 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1970 1971 Input Parameters: 1972 + sfA - The first PetscSF 1973 - sfB - The second PetscSF 1974 1975 Output Parameters: 1976 . sfBA - The composite SF 1977 1978 Level: developer 1979 1980 Notes: 1981 Currently, the two SFs must be defined on congruent communicators and they must be true star 1982 forests, i.e. the same leaf is not connected with different roots. 1983 1984 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1985 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1986 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1987 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1988 1989 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1990 @*/ 1991 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1992 { 1993 PetscErrorCode ierr; 1994 const PetscSFNode *remotePointsA,*remotePointsB; 1995 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1996 const PetscInt *localPointsA,*localPointsB; 1997 PetscInt *localPointsBA; 1998 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1999 PetscBool denseB; 2000 2001 PetscFunctionBegin; 2002 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2003 PetscSFCheckGraphSet(sfA,1); 2004 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2005 PetscSFCheckGraphSet(sfB,2); 2006 PetscCheckSameComm(sfA,1,sfB,2); 2007 PetscValidPointer(sfBA,3); 2008 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2009 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2010 2011 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2012 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2013 if (localPointsA) { 2014 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2015 for (i=0; i<numRootsB; i++) { 2016 reorderedRemotePointsA[i].rank = -1; 2017 reorderedRemotePointsA[i].index = -1; 2018 } 2019 for (i=0; i<numLeavesA; i++) { 2020 if (localPointsA[i] >= numRootsB) continue; 2021 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2022 } 2023 remotePointsA = reorderedRemotePointsA; 2024 } 2025 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2026 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2027 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2028 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2029 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2030 2031 denseB = (PetscBool)!localPointsB; 2032 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2033 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2034 else numLeavesBA++; 2035 } 2036 if (denseB) { 2037 localPointsBA = NULL; 2038 remotePointsBA = leafdataB; 2039 } else { 2040 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2041 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2042 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2043 const PetscInt l = localPointsB ? localPointsB[i] : i; 2044 2045 if (leafdataB[l-minleaf].rank == -1) continue; 2046 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2047 localPointsBA[numLeavesBA] = l; 2048 numLeavesBA++; 2049 } 2050 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2051 } 2052 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2053 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2054 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 /*@ 2059 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2060 2061 Input Parameters: 2062 + sfA - The first PetscSF 2063 - sfB - The second PetscSF 2064 2065 Output Parameters: 2066 . sfBA - The composite SF. 2067 2068 Level: developer 2069 2070 Notes: 2071 Currently, the two SFs must be defined on congruent communicators and they must be true star 2072 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2073 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2074 2075 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2076 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2077 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2078 a Reduce on sfB, on connected roots. 2079 2080 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2081 @*/ 2082 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2083 { 2084 PetscErrorCode ierr; 2085 const PetscSFNode *remotePointsA,*remotePointsB; 2086 PetscSFNode *remotePointsBA; 2087 const PetscInt *localPointsA,*localPointsB; 2088 PetscSFNode *reorderedRemotePointsA = NULL; 2089 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2090 MPI_Op op; 2091 #if defined(PETSC_USE_64BIT_INDICES) 2092 PetscBool iswin; 2093 #endif 2094 2095 PetscFunctionBegin; 2096 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2097 PetscSFCheckGraphSet(sfA,1); 2098 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2099 PetscSFCheckGraphSet(sfB,2); 2100 PetscCheckSameComm(sfA,1,sfB,2); 2101 PetscValidPointer(sfBA,3); 2102 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2103 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2104 2105 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2106 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2107 2108 /* TODO: Check roots of sfB have degree of 1 */ 2109 /* Once we implement it, we can replace the MPI_MAXLOC 2110 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2111 We use MPI_MAXLOC only to have a deterministic output from this routine if 2112 the root condition is not meet. 2113 */ 2114 op = MPI_MAXLOC; 2115 #if defined(PETSC_USE_64BIT_INDICES) 2116 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2117 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2118 if (iswin) op = MPI_REPLACE; 2119 #endif 2120 2121 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2122 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2123 for (i=0; i<maxleaf - minleaf + 1; i++) { 2124 reorderedRemotePointsA[i].rank = -1; 2125 reorderedRemotePointsA[i].index = -1; 2126 } 2127 if (localPointsA) { 2128 for (i=0; i<numLeavesA; i++) { 2129 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2130 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2131 } 2132 } else { 2133 for (i=0; i<numLeavesA; i++) { 2134 if (i > maxleaf || i < minleaf) continue; 2135 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2136 } 2137 } 2138 2139 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2140 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2141 for (i=0; i<numRootsB; i++) { 2142 remotePointsBA[i].rank = -1; 2143 remotePointsBA[i].index = -1; 2144 } 2145 2146 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2147 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2148 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2149 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2150 if (remotePointsBA[i].rank == -1) continue; 2151 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2152 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2153 localPointsBA[numLeavesBA] = i; 2154 numLeavesBA++; 2155 } 2156 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2157 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2158 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 /* 2163 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2164 2165 Input Parameters: 2166 . sf - The global PetscSF 2167 2168 Output Parameters: 2169 . out - The local PetscSF 2170 */ 2171 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2172 { 2173 MPI_Comm comm; 2174 PetscMPIInt myrank; 2175 const PetscInt *ilocal; 2176 const PetscSFNode *iremote; 2177 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2178 PetscSFNode *liremote; 2179 PetscSF lsf; 2180 PetscErrorCode ierr; 2181 2182 PetscFunctionBegin; 2183 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2184 if (sf->ops->CreateLocalSF) { 2185 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2186 } else { 2187 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2188 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2189 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2190 2191 /* Find out local edges and build a local SF */ 2192 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2193 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2194 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2195 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2196 2197 for (i=j=0; i<nleaves; i++) { 2198 if (iremote[i].rank == (PetscInt)myrank) { 2199 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2200 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2201 liremote[j].index = iremote[i].index; 2202 j++; 2203 } 2204 } 2205 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2206 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2207 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2208 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2209 *out = lsf; 2210 } 2211 PetscFunctionReturn(0); 2212 } 2213 2214 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2215 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2216 { 2217 PetscErrorCode ierr; 2218 PetscMemType rootmtype,leafmtype; 2219 2220 PetscFunctionBegin; 2221 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2222 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2223 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2224 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2225 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2226 if (sf->ops->BcastToZero) { 2227 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2228 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2229 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233