1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_HAVE_CUDA) 6 #include <cuda_runtime.h> 7 #endif 8 9 #if defined(PETSC_HAVE_HIP) 10 #include <hip/hip_runtime.h> 11 #endif 12 13 #if defined(PETSC_USE_DEBUG) 14 # define PetscSFCheckGraphSet(sf,arg) do { \ 15 if (PetscUnlikely(!(sf)->graphset)) \ 16 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 17 } while (0) 18 #else 19 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 20 #endif 21 22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 23 24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 25 { 26 PetscFunctionBegin; 27 PetscValidPointer(type,2); 28 *type = PETSC_MEMTYPE_HOST; 29 #if defined(PETSC_HAVE_CUDA) 30 if (PetscCUDAInitialized && data) { 31 cudaError_t cerr; 32 struct cudaPointerAttributes attr; 33 enum cudaMemoryType mtype; 34 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 35 cudaGetLastError(); /* Reset the last error */ 36 #if (CUDART_VERSION < 10000) 37 mtype = attr.memoryType; 38 #else 39 mtype = attr.type; 40 #endif 41 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 42 } 43 #endif 44 45 #if defined(PETSC_HAVE_HIP) 46 if (PetscHIPInitialized && data) { 47 hipError_t cerr; 48 struct hipPointerAttribute_t attr; 49 enum hipMemoryType mtype; 50 cerr = hipPointerGetAttributes(&attr,data); 51 hipGetLastError(); /* Reset the last error */ 52 mtype = attr.memoryType; 53 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 54 } 55 #endif 56 PetscFunctionReturn(0); 57 } 58 59 /*@ 60 PetscSFCreate - create a star forest communication context 61 62 Collective 63 64 Input Arguments: 65 . comm - communicator on which the star forest will operate 66 67 Output Arguments: 68 . sf - new star forest context 69 70 Options Database Keys: 71 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 72 . -sf_type window -Use MPI-3 one-sided window for communication 73 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 74 75 Level: intermediate 76 77 Notes: 78 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 79 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 80 SFs are optimized and they have better performance than general SFs. 81 82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 83 @*/ 84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 85 { 86 PetscErrorCode ierr; 87 PetscSF b; 88 89 PetscFunctionBegin; 90 PetscValidPointer(sf,2); 91 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 92 93 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 94 95 b->nroots = -1; 96 b->nleaves = -1; 97 b->minleaf = PETSC_MAX_INT; 98 b->maxleaf = PETSC_MIN_INT; 99 b->nranks = -1; 100 b->rankorder = PETSC_TRUE; 101 b->ingroup = MPI_GROUP_NULL; 102 b->outgroup = MPI_GROUP_NULL; 103 b->graphset = PETSC_FALSE; 104 #if defined(PETSC_HAVE_DEVICE) 105 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 106 b->use_stream_aware_mpi = PETSC_FALSE; 107 b->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 108 /* Set the default */ 109 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 110 b->backend = PETSCSF_BACKEND_KOKKOS; 111 #elif defined(PETSC_HAVE_CUDA) 112 b->backend = PETSCSF_BACKEND_CUDA; 113 #elif defined(PETSC_HAVE_HIP) 114 b->backend = PETSCSF_BACKEND_HIP; 115 #endif 116 #endif 117 b->vscat.from_n = -1; 118 b->vscat.to_n = -1; 119 b->vscat.unit = MPIU_SCALAR; 120 *sf = b; 121 PetscFunctionReturn(0); 122 } 123 124 /*@ 125 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 126 127 Collective 128 129 Input Arguments: 130 . sf - star forest 131 132 Level: advanced 133 134 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 135 @*/ 136 PetscErrorCode PetscSFReset(PetscSF sf) 137 { 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 142 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 143 sf->nroots = -1; 144 sf->nleaves = -1; 145 sf->minleaf = PETSC_MAX_INT; 146 sf->maxleaf = PETSC_MIN_INT; 147 sf->mine = NULL; 148 sf->remote = NULL; 149 sf->graphset = PETSC_FALSE; 150 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 151 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 152 sf->nranks = -1; 153 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 154 #if defined(PETSC_HAVE_DEVICE) 155 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 156 #endif 157 sf->degreeknown = PETSC_FALSE; 158 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 159 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 160 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 161 if (sf->multi) sf->multi->multi = NULL; 162 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 163 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 164 sf->setupcalled = PETSC_FALSE; 165 PetscFunctionReturn(0); 166 } 167 168 /*@C 169 PetscSFSetType - Set the PetscSF communication implementation 170 171 Collective on PetscSF 172 173 Input Parameters: 174 + sf - the PetscSF context 175 - type - a known method 176 177 Options Database Key: 178 . -sf_type <type> - Sets the method; use -help for a list 179 of available methods (for instance, window, basic, neighbor) 180 181 Notes: 182 See "include/petscsf.h" for available methods (for instance) 183 + PETSCSFWINDOW - MPI-2/3 one-sided 184 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 185 186 Level: intermediate 187 188 .seealso: PetscSFType, PetscSFCreate() 189 @*/ 190 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 191 { 192 PetscErrorCode ierr,(*r)(PetscSF); 193 PetscBool match; 194 195 PetscFunctionBegin; 196 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 197 PetscValidCharPointer(type,2); 198 199 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 200 if (match) PetscFunctionReturn(0); 201 202 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 203 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 204 /* Destroy the previous PetscSF implementation context */ 205 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 206 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 207 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 208 ierr = (*r)(sf);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 /*@C 213 PetscSFGetType - Get the PetscSF communication implementation 214 215 Not Collective 216 217 Input Parameter: 218 . sf - the PetscSF context 219 220 Output Parameter: 221 . type - the PetscSF type name 222 223 Level: intermediate 224 225 .seealso: PetscSFSetType(), PetscSFCreate() 226 @*/ 227 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 228 { 229 PetscFunctionBegin; 230 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 231 PetscValidPointer(type,2); 232 *type = ((PetscObject)sf)->type_name; 233 PetscFunctionReturn(0); 234 } 235 236 /*@ 237 PetscSFDestroy - destroy star forest 238 239 Collective 240 241 Input Arguments: 242 . sf - address of star forest 243 244 Level: intermediate 245 246 .seealso: PetscSFCreate(), PetscSFReset() 247 @*/ 248 PetscErrorCode PetscSFDestroy(PetscSF *sf) 249 { 250 PetscErrorCode ierr; 251 252 PetscFunctionBegin; 253 if (!*sf) PetscFunctionReturn(0); 254 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 255 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 256 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 257 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 258 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 259 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRQ(ierr);} 260 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 265 { 266 PetscInt i, nleaves; 267 PetscMPIInt size; 268 const PetscInt *ilocal; 269 const PetscSFNode *iremote; 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 274 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 275 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 276 for (i = 0; i < nleaves; i++) { 277 const PetscInt rank = iremote[i].rank; 278 const PetscInt remote = iremote[i].index; 279 const PetscInt leaf = ilocal ? ilocal[i] : i; 280 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 281 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 282 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 283 } 284 PetscFunctionReturn(0); 285 } 286 287 288 289 290 /*@ 291 PetscSFSetUp - set up communication structures 292 293 Collective 294 295 Input Arguments: 296 . sf - star forest communication object 297 298 Level: beginner 299 300 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 301 @*/ 302 PetscErrorCode PetscSFSetUp(PetscSF sf) 303 { 304 PetscErrorCode ierr; 305 306 PetscFunctionBegin; 307 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 308 PetscSFCheckGraphSet(sf,1); 309 if (sf->setupcalled) PetscFunctionReturn(0); 310 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 311 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 312 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 313 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 314 #if defined(PETSC_HAVE_CUDA) 315 if (sf->backend == PETSCSF_BACKEND_CUDA) { 316 sf->ops->Malloc = PetscSFMalloc_Cuda; 317 sf->ops->Free = PetscSFFree_Cuda; 318 } 319 #endif 320 #if defined(PETSC_HAVE_HIP) 321 if (sf->backend == PETSCSF_BACKEND_HIP) { 322 sf->ops->Malloc = PetscSFMalloc_HIP; 323 sf->ops->Free = PetscSFFree_HIP; 324 } 325 #endif 326 327 # 328 #if defined(PETSC_HAVE_KOKKOS) 329 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 330 sf->ops->Malloc = PetscSFMalloc_Kokkos; 331 sf->ops->Free = PetscSFFree_Kokkos; 332 } 333 #endif 334 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 335 sf->setupcalled = PETSC_TRUE; 336 PetscFunctionReturn(0); 337 } 338 339 /*@ 340 PetscSFSetFromOptions - set PetscSF options using the options database 341 342 Logically Collective 343 344 Input Arguments: 345 . sf - star forest 346 347 Options Database Keys: 348 + -sf_type - implementation type, see PetscSFSetType() 349 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 350 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 351 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 352 If true, this option only works with -use_gpu_aware_mpi 1. 353 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 354 If true, this option only works with -use_gpu_aware_mpi 1. 355 356 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 357 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 358 the only available is kokkos. 359 360 Level: intermediate 361 @*/ 362 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 363 { 364 PetscSFType deft; 365 char type[256]; 366 PetscErrorCode ierr; 367 PetscBool flg; 368 369 PetscFunctionBegin; 370 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 371 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 372 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 373 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 374 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 375 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 376 #if defined(PETSC_HAVE_DEVICE) 377 { 378 char backendstr[32] = {0}; 379 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 380 /* Change the defaults set in PetscSFCreate() with command line options */ 381 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 382 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 383 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 384 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 385 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 386 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 387 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 388 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 389 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 390 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 391 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 392 #elif defined(PETSC_HAVE_KOKKOS) 393 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 394 #endif 395 } 396 #endif 397 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 398 ierr = PetscOptionsEnd();CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 /*@ 403 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 404 405 Logically Collective 406 407 Input Arguments: 408 + sf - star forest 409 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 410 411 Level: advanced 412 413 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 414 @*/ 415 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 416 { 417 PetscFunctionBegin; 418 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 419 PetscValidLogicalCollectiveBool(sf,flg,2); 420 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 421 sf->rankorder = flg; 422 PetscFunctionReturn(0); 423 } 424 425 /*@ 426 PetscSFSetGraph - Set a parallel star forest 427 428 Collective 429 430 Input Arguments: 431 + sf - star forest 432 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 433 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 434 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 435 during setup in debug mode) 436 . localmode - copy mode for ilocal 437 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 438 during setup in debug mode) 439 - remotemode - copy mode for iremote 440 441 Level: intermediate 442 443 Notes: 444 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 445 446 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 447 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 448 needed 449 450 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 451 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 452 453 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 454 @*/ 455 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 456 { 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 461 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 462 if (nleaves > 0) PetscValidPointer(iremote,6); 463 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 464 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 465 466 if (sf->nroots >= 0) { /* Reset only if graph already set */ 467 ierr = PetscSFReset(sf);CHKERRQ(ierr); 468 } 469 470 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 471 472 sf->nroots = nroots; 473 sf->nleaves = nleaves; 474 475 if (nleaves && ilocal) { 476 PetscInt i; 477 PetscInt minleaf = PETSC_MAX_INT; 478 PetscInt maxleaf = PETSC_MIN_INT; 479 int contiguous = 1; 480 for (i=0; i<nleaves; i++) { 481 minleaf = PetscMin(minleaf,ilocal[i]); 482 maxleaf = PetscMax(maxleaf,ilocal[i]); 483 contiguous &= (ilocal[i] == i); 484 } 485 sf->minleaf = minleaf; 486 sf->maxleaf = maxleaf; 487 if (contiguous) { 488 if (localmode == PETSC_OWN_POINTER) { 489 ierr = PetscFree(ilocal);CHKERRQ(ierr); 490 } 491 ilocal = NULL; 492 } 493 } else { 494 sf->minleaf = 0; 495 sf->maxleaf = nleaves - 1; 496 } 497 498 if (ilocal) { 499 switch (localmode) { 500 case PETSC_COPY_VALUES: 501 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 502 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 503 sf->mine = sf->mine_alloc; 504 break; 505 case PETSC_OWN_POINTER: 506 sf->mine_alloc = (PetscInt*)ilocal; 507 sf->mine = sf->mine_alloc; 508 break; 509 case PETSC_USE_POINTER: 510 sf->mine_alloc = NULL; 511 sf->mine = (PetscInt*)ilocal; 512 break; 513 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 514 } 515 } 516 517 switch (remotemode) { 518 case PETSC_COPY_VALUES: 519 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 520 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 521 sf->remote = sf->remote_alloc; 522 break; 523 case PETSC_OWN_POINTER: 524 sf->remote_alloc = (PetscSFNode*)iremote; 525 sf->remote = sf->remote_alloc; 526 break; 527 case PETSC_USE_POINTER: 528 sf->remote_alloc = NULL; 529 sf->remote = (PetscSFNode*)iremote; 530 break; 531 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 532 } 533 534 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 535 sf->graphset = PETSC_TRUE; 536 PetscFunctionReturn(0); 537 } 538 539 /*@ 540 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 541 542 Collective 543 544 Input Parameters: 545 + sf - The PetscSF 546 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 547 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 548 549 Notes: 550 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 551 n and N are local and global sizes of x respectively. 552 553 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 554 sequential vectors y on all ranks. 555 556 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 557 sequential vector y on rank 0. 558 559 In above cases, entries of x are roots and entries of y are leaves. 560 561 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 562 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 563 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 564 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 565 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 566 567 In this case, roots and leaves are symmetric. 568 569 Level: intermediate 570 @*/ 571 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 572 { 573 MPI_Comm comm; 574 PetscInt n,N,res[2]; 575 PetscMPIInt rank,size; 576 PetscSFType type; 577 PetscErrorCode ierr; 578 579 PetscFunctionBegin; 580 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 581 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 582 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 583 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 584 585 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 586 type = PETSCSFALLTOALL; 587 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 588 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 589 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 590 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 591 } else { 592 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 593 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 594 res[0] = n; 595 res[1] = -n; 596 /* Check if n are same over all ranks so that we can optimize it */ 597 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 598 if (res[0] == -res[1]) { /* same n */ 599 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 600 } else { 601 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 602 } 603 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 604 } 605 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 606 607 sf->pattern = pattern; 608 sf->mine = NULL; /* Contiguous */ 609 610 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 611 Also set other easy stuff. 612 */ 613 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 614 sf->nleaves = N; 615 sf->nroots = n; 616 sf->nranks = size; 617 sf->minleaf = 0; 618 sf->maxleaf = N - 1; 619 } else if (pattern == PETSCSF_PATTERN_GATHER) { 620 sf->nleaves = rank ? 0 : N; 621 sf->nroots = n; 622 sf->nranks = rank ? 0 : size; 623 sf->minleaf = 0; 624 sf->maxleaf = rank ? -1 : N - 1; 625 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 626 sf->nleaves = size; 627 sf->nroots = size; 628 sf->nranks = size; 629 sf->minleaf = 0; 630 sf->maxleaf = size - 1; 631 } 632 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 633 sf->graphset = PETSC_TRUE; 634 PetscFunctionReturn(0); 635 } 636 637 /*@ 638 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 639 640 Collective 641 642 Input Arguments: 643 644 . sf - star forest to invert 645 646 Output Arguments: 647 . isf - inverse of sf 648 Level: advanced 649 650 Notes: 651 All roots must have degree 1. 652 653 The local space may be a permutation, but cannot be sparse. 654 655 .seealso: PetscSFSetGraph() 656 @*/ 657 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 658 { 659 PetscErrorCode ierr; 660 PetscMPIInt rank; 661 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 662 const PetscInt *ilocal; 663 PetscSFNode *roots,*leaves; 664 665 PetscFunctionBegin; 666 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 667 PetscSFCheckGraphSet(sf,1); 668 PetscValidPointer(isf,2); 669 670 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 671 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 672 673 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 674 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 675 for (i=0; i<maxlocal; i++) { 676 leaves[i].rank = rank; 677 leaves[i].index = i; 678 } 679 for (i=0; i <nroots; i++) { 680 roots[i].rank = -1; 681 roots[i].index = -1; 682 } 683 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 684 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 685 686 /* Check whether our leaves are sparse */ 687 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 688 if (count == nroots) newilocal = NULL; 689 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 690 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 691 for (i=0,count=0; i<nroots; i++) { 692 if (roots[i].rank >= 0) { 693 newilocal[count] = i; 694 roots[count].rank = roots[i].rank; 695 roots[count].index = roots[i].index; 696 count++; 697 } 698 } 699 } 700 701 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 702 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 703 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 /*@ 708 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 709 710 Collective 711 712 Input Arguments: 713 + sf - communication object to duplicate 714 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 715 716 Output Arguments: 717 . newsf - new communication object 718 719 Level: beginner 720 721 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 722 @*/ 723 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 724 { 725 PetscSFType type; 726 PetscErrorCode ierr; 727 MPI_Datatype dtype=MPIU_SCALAR; 728 729 PetscFunctionBegin; 730 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 731 PetscValidLogicalCollectiveEnum(sf,opt,2); 732 PetscValidPointer(newsf,3); 733 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 734 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 735 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 736 if (opt == PETSCSF_DUPLICATE_GRAPH) { 737 PetscSFCheckGraphSet(sf,1); 738 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 739 PetscInt nroots,nleaves; 740 const PetscInt *ilocal; 741 const PetscSFNode *iremote; 742 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 743 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 744 } else { 745 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 746 } 747 } 748 /* Since oldtype is committed, so is newtype, according to MPI */ 749 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRQ(ierr);} 750 (*newsf)->vscat.bs = sf->vscat.bs; 751 (*newsf)->vscat.unit = dtype; 752 (*newsf)->vscat.to_n = sf->vscat.to_n; 753 (*newsf)->vscat.from_n = sf->vscat.from_n; 754 /* Do not copy lsf. Build it on demand since it is rarely used */ 755 756 #if defined(PETSC_HAVE_DEVICE) 757 (*newsf)->backend = sf->backend; 758 (*newsf)->use_default_stream = sf->use_default_stream; 759 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 760 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 761 #endif 762 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 763 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 764 PetscFunctionReturn(0); 765 } 766 767 /*@C 768 PetscSFGetGraph - Get the graph specifying a parallel star forest 769 770 Not Collective 771 772 Input Arguments: 773 . sf - star forest 774 775 Output Arguments: 776 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 777 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 778 . ilocal - locations of leaves in leafdata buffers 779 - iremote - remote locations of root vertices for each leaf on the current process 780 781 Notes: 782 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 783 784 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 785 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 786 787 Level: intermediate 788 789 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 790 @*/ 791 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 792 { 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 797 if (sf->ops->GetGraph) { 798 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 799 } else { 800 if (nroots) *nroots = sf->nroots; 801 if (nleaves) *nleaves = sf->nleaves; 802 if (ilocal) *ilocal = sf->mine; 803 if (iremote) *iremote = sf->remote; 804 } 805 PetscFunctionReturn(0); 806 } 807 808 /*@ 809 PetscSFGetLeafRange - Get the active leaf ranges 810 811 Not Collective 812 813 Input Arguments: 814 . sf - star forest 815 816 Output Arguments: 817 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 818 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 819 820 Level: developer 821 822 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 823 @*/ 824 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 825 { 826 PetscFunctionBegin; 827 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 828 PetscSFCheckGraphSet(sf,1); 829 if (minleaf) *minleaf = sf->minleaf; 830 if (maxleaf) *maxleaf = sf->maxleaf; 831 PetscFunctionReturn(0); 832 } 833 834 /*@C 835 PetscSFViewFromOptions - View from Options 836 837 Collective on PetscSF 838 839 Input Parameters: 840 + A - the star forest 841 . obj - Optional object 842 - name - command line option 843 844 Level: intermediate 845 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 846 @*/ 847 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 848 { 849 PetscErrorCode ierr; 850 851 PetscFunctionBegin; 852 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 853 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 /*@C 858 PetscSFView - view a star forest 859 860 Collective 861 862 Input Arguments: 863 + sf - star forest 864 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 865 866 Level: beginner 867 868 .seealso: PetscSFCreate(), PetscSFSetGraph() 869 @*/ 870 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 871 { 872 PetscErrorCode ierr; 873 PetscBool iascii; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 878 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 879 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 880 PetscCheckSameComm(sf,1,viewer,2); 881 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 882 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 883 if (iascii) { 884 PetscMPIInt rank; 885 PetscInt ii,i,j; 886 887 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 888 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 889 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 890 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 891 if (!sf->graphset) { 892 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 893 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 897 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 899 for (i=0; i<sf->nleaves; i++) { 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 901 } 902 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 903 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 904 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 905 PetscMPIInt *tmpranks,*perm; 906 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 907 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 908 for (i=0; i<sf->nranks; i++) perm[i] = i; 909 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 910 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 911 for (ii=0; ii<sf->nranks; ii++) { 912 i = perm[ii]; 913 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 914 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 915 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 916 } 917 } 918 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 919 } 920 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 921 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 922 } 923 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 924 } 925 PetscFunctionReturn(0); 926 } 927 928 /*@C 929 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 930 931 Not Collective 932 933 Input Arguments: 934 . sf - star forest 935 936 Output Arguments: 937 + nranks - number of ranks referenced by local part 938 . ranks - array of ranks 939 . roffset - offset in rmine/rremote for each rank (length nranks+1) 940 . rmine - concatenated array holding local indices referencing each remote rank 941 - rremote - concatenated array holding remote indices referenced for each remote rank 942 943 Level: developer 944 945 .seealso: PetscSFGetLeafRanks() 946 @*/ 947 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 948 { 949 PetscErrorCode ierr; 950 951 PetscFunctionBegin; 952 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 953 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 954 if (sf->ops->GetRootRanks) { 955 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 956 } else { 957 /* The generic implementation */ 958 if (nranks) *nranks = sf->nranks; 959 if (ranks) *ranks = sf->ranks; 960 if (roffset) *roffset = sf->roffset; 961 if (rmine) *rmine = sf->rmine; 962 if (rremote) *rremote = sf->rremote; 963 } 964 PetscFunctionReturn(0); 965 } 966 967 /*@C 968 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 969 970 Not Collective 971 972 Input Arguments: 973 . sf - star forest 974 975 Output Arguments: 976 + niranks - number of leaf ranks referencing roots on this process 977 . iranks - array of ranks 978 . ioffset - offset in irootloc for each rank (length niranks+1) 979 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 980 981 Level: developer 982 983 .seealso: PetscSFGetRootRanks() 984 @*/ 985 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 986 { 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 991 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 992 if (sf->ops->GetLeafRanks) { 993 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 994 } else { 995 PetscSFType type; 996 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 997 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 998 } 999 PetscFunctionReturn(0); 1000 } 1001 1002 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 1003 PetscInt i; 1004 for (i=0; i<n; i++) { 1005 if (needle == list[i]) return PETSC_TRUE; 1006 } 1007 return PETSC_FALSE; 1008 } 1009 1010 /*@C 1011 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 1012 1013 Collective 1014 1015 Input Arguments: 1016 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1017 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1018 1019 Level: developer 1020 1021 .seealso: PetscSFGetRootRanks() 1022 @*/ 1023 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1024 { 1025 PetscErrorCode ierr; 1026 PetscTable table; 1027 PetscTablePosition pos; 1028 PetscMPIInt size,groupsize,*groupranks; 1029 PetscInt *rcount,*ranks; 1030 PetscInt i, irank = -1,orank = -1; 1031 1032 PetscFunctionBegin; 1033 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1034 PetscSFCheckGraphSet(sf,1); 1035 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1036 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1037 for (i=0; i<sf->nleaves; i++) { 1038 /* Log 1-based rank */ 1039 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1040 } 1041 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1042 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1043 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1044 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1045 for (i=0; i<sf->nranks; i++) { 1046 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1047 ranks[i]--; /* Convert back to 0-based */ 1048 } 1049 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1050 1051 /* We expect that dgroup is reliably "small" while nranks could be large */ 1052 { 1053 MPI_Group group = MPI_GROUP_NULL; 1054 PetscMPIInt *dgroupranks; 1055 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1056 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1057 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1058 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1059 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1060 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1061 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1062 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1063 } 1064 1065 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1066 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1067 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1068 if (InList(ranks[i],groupsize,groupranks)) break; 1069 } 1070 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1071 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1072 } 1073 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1074 PetscInt tmprank,tmpcount; 1075 1076 tmprank = ranks[i]; 1077 tmpcount = rcount[i]; 1078 ranks[i] = ranks[sf->ndranks]; 1079 rcount[i] = rcount[sf->ndranks]; 1080 ranks[sf->ndranks] = tmprank; 1081 rcount[sf->ndranks] = tmpcount; 1082 sf->ndranks++; 1083 } 1084 } 1085 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1086 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1087 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1088 sf->roffset[0] = 0; 1089 for (i=0; i<sf->nranks; i++) { 1090 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1091 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1092 rcount[i] = 0; 1093 } 1094 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1095 /* short circuit */ 1096 if (orank != sf->remote[i].rank) { 1097 /* Search for index of iremote[i].rank in sf->ranks */ 1098 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1099 if (irank < 0) { 1100 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1101 if (irank >= 0) irank += sf->ndranks; 1102 } 1103 orank = sf->remote[i].rank; 1104 } 1105 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1106 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1107 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1108 rcount[irank]++; 1109 } 1110 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 /*@C 1115 PetscSFGetGroups - gets incoming and outgoing process groups 1116 1117 Collective 1118 1119 Input Argument: 1120 . sf - star forest 1121 1122 Output Arguments: 1123 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1124 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1125 1126 Level: developer 1127 1128 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1129 @*/ 1130 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1131 { 1132 PetscErrorCode ierr; 1133 MPI_Group group = MPI_GROUP_NULL; 1134 1135 PetscFunctionBegin; 1136 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1137 if (sf->ingroup == MPI_GROUP_NULL) { 1138 PetscInt i; 1139 const PetscInt *indegree; 1140 PetscMPIInt rank,*outranks,*inranks; 1141 PetscSFNode *remote; 1142 PetscSF bgcount; 1143 1144 /* Compute the number of incoming ranks */ 1145 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1146 for (i=0; i<sf->nranks; i++) { 1147 remote[i].rank = sf->ranks[i]; 1148 remote[i].index = 0; 1149 } 1150 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1151 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1152 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1153 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1154 /* Enumerate the incoming ranks */ 1155 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1156 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1157 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1158 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1159 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1160 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1161 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1162 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1163 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1164 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1165 } 1166 *incoming = sf->ingroup; 1167 1168 if (sf->outgroup == MPI_GROUP_NULL) { 1169 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1170 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1171 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1172 } 1173 *outgoing = sf->outgroup; 1174 PetscFunctionReturn(0); 1175 } 1176 1177 /*@ 1178 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1179 1180 Collective 1181 1182 Input Argument: 1183 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1184 1185 Output Arguments: 1186 . multi - star forest with split roots, such that each root has degree exactly 1 1187 1188 Level: developer 1189 1190 Notes: 1191 1192 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1193 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1194 edge, it is a candidate for future optimization that might involve its removal. 1195 1196 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1197 @*/ 1198 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1199 { 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1204 PetscValidPointer(multi,2); 1205 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1206 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1207 *multi = sf->multi; 1208 sf->multi->multi = sf->multi; 1209 PetscFunctionReturn(0); 1210 } 1211 if (!sf->multi) { 1212 const PetscInt *indegree; 1213 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1214 PetscSFNode *remote; 1215 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1216 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1217 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1218 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1219 inoffset[0] = 0; 1220 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1221 for (i=0; i<maxlocal; i++) outones[i] = 1; 1222 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1223 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1224 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1225 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1226 for (i=0; i<sf->nroots; i++) { 1227 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1228 } 1229 } 1230 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1231 for (i=0; i<sf->nleaves; i++) { 1232 remote[i].rank = sf->remote[i].rank; 1233 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1234 } 1235 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1236 sf->multi->multi = sf->multi; 1237 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1238 if (sf->rankorder) { /* Sort the ranks */ 1239 PetscMPIInt rank; 1240 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1241 PetscSFNode *newremote; 1242 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1243 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1244 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1245 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1246 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1247 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1248 /* Sort the incoming ranks at each vertex, build the inverse map */ 1249 for (i=0; i<sf->nroots; i++) { 1250 PetscInt j; 1251 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1252 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1253 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1254 } 1255 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1256 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1257 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1258 for (i=0; i<sf->nleaves; i++) { 1259 newremote[i].rank = sf->remote[i].rank; 1260 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1261 } 1262 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1263 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1264 } 1265 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1266 } 1267 *multi = sf->multi; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 /*@C 1272 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1273 1274 Collective 1275 1276 Input Arguments: 1277 + sf - original star forest 1278 . nselected - number of selected roots on this process 1279 - selected - indices of the selected roots on this process 1280 1281 Output Arguments: 1282 . esf - new star forest 1283 1284 Level: advanced 1285 1286 Note: 1287 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1288 be done by calling PetscSFGetGraph(). 1289 1290 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1291 @*/ 1292 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1293 { 1294 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1295 const PetscInt *ilocal; 1296 signed char *rootdata,*leafdata,*leafmem; 1297 const PetscSFNode *iremote; 1298 PetscSFNode *new_iremote; 1299 MPI_Comm comm; 1300 PetscErrorCode ierr; 1301 1302 PetscFunctionBegin; 1303 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1304 PetscSFCheckGraphSet(sf,1); 1305 if (nselected) PetscValidPointer(selected,3); 1306 PetscValidPointer(esf,4); 1307 1308 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1309 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1310 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1311 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1312 1313 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1314 PetscBool dups; 1315 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1316 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1317 for (i=0; i<nselected; i++) 1318 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1319 } 1320 1321 if (sf->ops->CreateEmbeddedSF) { 1322 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1323 } else { 1324 /* A generic version of creating embedded sf */ 1325 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1326 maxlocal = maxleaf - minleaf + 1; 1327 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1328 leafdata = leafmem - minleaf; 1329 /* Tag selected roots and bcast to leaves */ 1330 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1331 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1332 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1333 1334 /* Build esf with leaves that are still connected */ 1335 esf_nleaves = 0; 1336 for (i=0; i<nleaves; i++) { 1337 j = ilocal ? ilocal[i] : i; 1338 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1339 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1340 */ 1341 esf_nleaves += (leafdata[j] ? 1 : 0); 1342 } 1343 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1344 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1345 for (i=n=0; i<nleaves; i++) { 1346 j = ilocal ? ilocal[i] : i; 1347 if (leafdata[j]) { 1348 new_ilocal[n] = j; 1349 new_iremote[n].rank = iremote[i].rank; 1350 new_iremote[n].index = iremote[i].index; 1351 ++n; 1352 } 1353 } 1354 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1355 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1356 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1357 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1358 } 1359 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 /*@C 1364 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1365 1366 Collective 1367 1368 Input Arguments: 1369 + sf - original star forest 1370 . nselected - number of selected leaves on this process 1371 - selected - indices of the selected leaves on this process 1372 1373 Output Arguments: 1374 . newsf - new star forest 1375 1376 Level: advanced 1377 1378 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1379 @*/ 1380 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1381 { 1382 const PetscSFNode *iremote; 1383 PetscSFNode *new_iremote; 1384 const PetscInt *ilocal; 1385 PetscInt i,nroots,*leaves,*new_ilocal; 1386 MPI_Comm comm; 1387 PetscErrorCode ierr; 1388 1389 PetscFunctionBegin; 1390 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1391 PetscSFCheckGraphSet(sf,1); 1392 if (nselected) PetscValidPointer(selected,3); 1393 PetscValidPointer(newsf,4); 1394 1395 /* Uniq selected[] and put results in leaves[] */ 1396 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1397 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1398 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1399 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1400 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1401 1402 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1403 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1404 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1405 } else { 1406 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1407 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1408 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1409 for (i=0; i<nselected; ++i) { 1410 const PetscInt l = leaves[i]; 1411 new_ilocal[i] = ilocal ? ilocal[l] : l; 1412 new_iremote[i].rank = iremote[l].rank; 1413 new_iremote[i].index = iremote[l].index; 1414 } 1415 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1416 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1417 } 1418 ierr = PetscFree(leaves);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 /*@C 1423 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1424 1425 Collective on PetscSF 1426 1427 Input Arguments: 1428 + sf - star forest on which to communicate 1429 . unit - data type associated with each node 1430 . rootdata - buffer to broadcast 1431 - op - operation to use for reduction 1432 1433 Output Arguments: 1434 . leafdata - buffer to be reduced with values from each leaf's respective root 1435 1436 Level: intermediate 1437 1438 Notes: 1439 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1440 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1441 use PetscSFBcastAndOpWithMemTypeBegin() instead. 1442 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin() 1443 @*/ 1444 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1445 { 1446 PetscErrorCode ierr; 1447 PetscMemType rootmtype,leafmtype; 1448 1449 PetscFunctionBegin; 1450 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1451 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1452 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1453 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1454 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1455 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1456 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1457 PetscFunctionReturn(0); 1458 } 1459 1460 /*@C 1461 PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd() 1462 1463 Collective on PetscSF 1464 1465 Input Arguments: 1466 + sf - star forest on which to communicate 1467 . unit - data type associated with each node 1468 . rootmtype - memory type of rootdata 1469 . rootdata - buffer to broadcast 1470 . leafmtype - memory type of leafdata 1471 - op - operation to use for reduction 1472 1473 Output Arguments: 1474 . leafdata - buffer to be reduced with values from each leaf's respective root 1475 1476 Level: intermediate 1477 1478 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin() 1479 @*/ 1480 PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1481 { 1482 PetscErrorCode ierr; 1483 1484 PetscFunctionBegin; 1485 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1486 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1487 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1488 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1489 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1490 PetscFunctionReturn(0); 1491 } 1492 1493 /*@C 1494 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1495 1496 Collective 1497 1498 Input Arguments: 1499 + sf - star forest 1500 . unit - data type 1501 . rootdata - buffer to broadcast 1502 - op - operation to use for reduction 1503 1504 Output Arguments: 1505 . leafdata - buffer to be reduced with values from each leaf's respective root 1506 1507 Level: intermediate 1508 1509 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1510 @*/ 1511 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1512 { 1513 PetscErrorCode ierr; 1514 1515 PetscFunctionBegin; 1516 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1517 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);} 1518 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1519 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);} 1520 PetscFunctionReturn(0); 1521 } 1522 1523 /*@C 1524 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1525 1526 Collective 1527 1528 Input Arguments: 1529 + sf - star forest 1530 . unit - data type 1531 . leafdata - values to reduce 1532 - op - reduction operation 1533 1534 Output Arguments: 1535 . rootdata - result of reduction of values from all leaves of each root 1536 1537 Level: intermediate 1538 1539 Notes: 1540 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1541 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1542 use PetscSFReduceWithMemTypeBegin() instead. 1543 1544 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1545 @*/ 1546 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1547 { 1548 PetscErrorCode ierr; 1549 PetscMemType rootmtype,leafmtype; 1550 1551 PetscFunctionBegin; 1552 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1553 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1554 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1555 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1556 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1557 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1558 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1559 PetscFunctionReturn(0); 1560 } 1561 1562 /*@C 1563 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1564 1565 Collective 1566 1567 Input Arguments: 1568 + sf - star forest 1569 . unit - data type 1570 . leafmtype - memory type of leafdata 1571 . leafdata - values to reduce 1572 . rootmtype - memory type of rootdata 1573 - op - reduction operation 1574 1575 Output Arguments: 1576 . rootdata - result of reduction of values from all leaves of each root 1577 1578 Level: intermediate 1579 1580 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1581 @*/ 1582 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1583 { 1584 PetscErrorCode ierr; 1585 1586 PetscFunctionBegin; 1587 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1588 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1589 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1590 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1591 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1592 PetscFunctionReturn(0); 1593 } 1594 1595 /*@C 1596 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1597 1598 Collective 1599 1600 Input Arguments: 1601 + sf - star forest 1602 . unit - data type 1603 . leafdata - values to reduce 1604 - op - reduction operation 1605 1606 Output Arguments: 1607 . rootdata - result of reduction of values from all leaves of each root 1608 1609 Level: intermediate 1610 1611 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1612 @*/ 1613 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1614 { 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1619 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1620 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1621 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1622 PetscFunctionReturn(0); 1623 } 1624 1625 /*@C 1626 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1627 1628 Collective 1629 1630 Input Arguments: 1631 + sf - star forest 1632 . unit - data type 1633 . leafdata - leaf values to use in reduction 1634 - op - operation to use for reduction 1635 1636 Output Arguments: 1637 + rootdata - root values to be updated, input state is seen by first process to perform an update 1638 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1639 1640 Level: advanced 1641 1642 Note: 1643 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1644 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1645 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1646 integers. 1647 1648 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1649 @*/ 1650 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1651 { 1652 PetscErrorCode ierr; 1653 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1654 1655 PetscFunctionBegin; 1656 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1657 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1658 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1659 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1660 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1661 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1662 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1663 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1664 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 /*@C 1669 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1670 1671 Collective 1672 1673 Input Arguments: 1674 + sf - star forest 1675 . unit - data type 1676 . leafdata - leaf values to use in reduction 1677 - op - operation to use for reduction 1678 1679 Output Arguments: 1680 + rootdata - root values to be updated, input state is seen by first process to perform an update 1681 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1682 1683 Level: advanced 1684 1685 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1686 @*/ 1687 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1688 { 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1693 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1694 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1695 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 1699 /*@C 1700 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1701 1702 Collective 1703 1704 Input Arguments: 1705 . sf - star forest 1706 1707 Output Arguments: 1708 . degree - degree of each root vertex 1709 1710 Level: advanced 1711 1712 Notes: 1713 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1714 1715 .seealso: PetscSFGatherBegin() 1716 @*/ 1717 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1718 { 1719 PetscErrorCode ierr; 1720 1721 PetscFunctionBegin; 1722 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1723 PetscSFCheckGraphSet(sf,1); 1724 PetscValidPointer(degree,2); 1725 if (!sf->degreeknown) { 1726 PetscInt i, nroots = sf->nroots, maxlocal; 1727 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1728 maxlocal = sf->maxleaf-sf->minleaf+1; 1729 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1730 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1731 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1732 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1733 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1734 } 1735 *degree = NULL; 1736 PetscFunctionReturn(0); 1737 } 1738 1739 /*@C 1740 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1741 1742 Collective 1743 1744 Input Arguments: 1745 . sf - star forest 1746 1747 Output Arguments: 1748 . degree - degree of each root vertex 1749 1750 Level: developer 1751 1752 Notes: 1753 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1754 1755 .seealso: 1756 @*/ 1757 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1758 { 1759 PetscErrorCode ierr; 1760 1761 PetscFunctionBegin; 1762 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1763 PetscSFCheckGraphSet(sf,1); 1764 PetscValidPointer(degree,2); 1765 if (!sf->degreeknown) { 1766 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1767 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1768 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1769 sf->degreeknown = PETSC_TRUE; 1770 } 1771 *degree = sf->degree; 1772 PetscFunctionReturn(0); 1773 } 1774 1775 1776 /*@C 1777 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1778 Each multi-root is assigned index of the corresponding original root. 1779 1780 Collective 1781 1782 Input Arguments: 1783 + sf - star forest 1784 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1785 1786 Output Arguments: 1787 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1788 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1789 1790 Level: developer 1791 1792 Notes: 1793 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1794 1795 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1796 @*/ 1797 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1798 { 1799 PetscSF msf; 1800 PetscInt i, j, k, nroots, nmroots; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1805 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1806 if (nroots) PetscValidIntPointer(degree,2); 1807 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1808 PetscValidPointer(multiRootsOrigNumbering,4); 1809 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1810 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1811 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1812 for (i=0,j=0,k=0; i<nroots; i++) { 1813 if (!degree[i]) continue; 1814 for (j=0; j<degree[i]; j++,k++) { 1815 (*multiRootsOrigNumbering)[k] = i; 1816 } 1817 } 1818 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1819 if (nMultiRoots) *nMultiRoots = nmroots; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 /*@C 1824 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1825 1826 Collective 1827 1828 Input Arguments: 1829 + sf - star forest 1830 . unit - data type 1831 - leafdata - leaf data to gather to roots 1832 1833 Output Argument: 1834 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1835 1836 Level: intermediate 1837 1838 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1839 @*/ 1840 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1841 { 1842 PetscErrorCode ierr; 1843 PetscSF multi = NULL; 1844 1845 PetscFunctionBegin; 1846 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1847 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1848 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1849 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 /*@C 1854 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1855 1856 Collective 1857 1858 Input Arguments: 1859 + sf - star forest 1860 . unit - data type 1861 - leafdata - leaf data to gather to roots 1862 1863 Output Argument: 1864 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1865 1866 Level: intermediate 1867 1868 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1869 @*/ 1870 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1871 { 1872 PetscErrorCode ierr; 1873 PetscSF multi = NULL; 1874 1875 PetscFunctionBegin; 1876 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1877 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1878 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1879 PetscFunctionReturn(0); 1880 } 1881 1882 /*@C 1883 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1884 1885 Collective 1886 1887 Input Arguments: 1888 + sf - star forest 1889 . unit - data type 1890 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1891 1892 Output Argument: 1893 . leafdata - leaf data to be update with personal data from each respective root 1894 1895 Level: intermediate 1896 1897 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1898 @*/ 1899 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1900 { 1901 PetscErrorCode ierr; 1902 PetscSF multi = NULL; 1903 1904 PetscFunctionBegin; 1905 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1906 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1907 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1908 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 /*@C 1913 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1914 1915 Collective 1916 1917 Input Arguments: 1918 + sf - star forest 1919 . unit - data type 1920 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1921 1922 Output Argument: 1923 . leafdata - leaf data to be update with personal data from each respective root 1924 1925 Level: intermediate 1926 1927 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1928 @*/ 1929 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1930 { 1931 PetscErrorCode ierr; 1932 PetscSF multi = NULL; 1933 1934 PetscFunctionBegin; 1935 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1936 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1937 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1942 { 1943 PetscInt i, n, nleaves; 1944 const PetscInt *ilocal = NULL; 1945 PetscHSetI seen; 1946 PetscErrorCode ierr; 1947 1948 PetscFunctionBegin; 1949 if (PetscDefined(USE_DEBUG)) { 1950 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1951 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1952 for (i = 0; i < nleaves; i++) { 1953 const PetscInt leaf = ilocal ? ilocal[i] : i; 1954 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1955 } 1956 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1957 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1958 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1959 } 1960 PetscFunctionReturn(0); 1961 } 1962 1963 /*@ 1964 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1965 1966 Input Parameters: 1967 + sfA - The first PetscSF 1968 - sfB - The second PetscSF 1969 1970 Output Parameters: 1971 . sfBA - The composite SF 1972 1973 Level: developer 1974 1975 Notes: 1976 Currently, the two SFs must be defined on congruent communicators and they must be true star 1977 forests, i.e. the same leaf is not connected with different roots. 1978 1979 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1980 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1981 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1982 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1983 1984 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1985 @*/ 1986 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1987 { 1988 PetscErrorCode ierr; 1989 const PetscSFNode *remotePointsA,*remotePointsB; 1990 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1991 const PetscInt *localPointsA,*localPointsB; 1992 PetscInt *localPointsBA; 1993 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1994 PetscBool denseB; 1995 1996 PetscFunctionBegin; 1997 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1998 PetscSFCheckGraphSet(sfA,1); 1999 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2000 PetscSFCheckGraphSet(sfB,2); 2001 PetscCheckSameComm(sfA,1,sfB,2); 2002 PetscValidPointer(sfBA,3); 2003 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2004 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2005 2006 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2007 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2008 if (localPointsA) { 2009 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2010 for (i=0; i<numRootsB; i++) { 2011 reorderedRemotePointsA[i].rank = -1; 2012 reorderedRemotePointsA[i].index = -1; 2013 } 2014 for (i=0; i<numLeavesA; i++) { 2015 if (localPointsA[i] >= numRootsB) continue; 2016 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2017 } 2018 remotePointsA = reorderedRemotePointsA; 2019 } 2020 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2021 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2022 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 2023 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 2024 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2025 2026 denseB = (PetscBool)!localPointsB; 2027 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2028 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2029 else numLeavesBA++; 2030 } 2031 if (denseB) { 2032 localPointsBA = NULL; 2033 remotePointsBA = leafdataB; 2034 } else { 2035 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2036 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2037 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2038 const PetscInt l = localPointsB ? localPointsB[i] : i; 2039 2040 if (leafdataB[l-minleaf].rank == -1) continue; 2041 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2042 localPointsBA[numLeavesBA] = l; 2043 numLeavesBA++; 2044 } 2045 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2046 } 2047 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2048 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2049 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /*@ 2054 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2055 2056 Input Parameters: 2057 + sfA - The first PetscSF 2058 - sfB - The second PetscSF 2059 2060 Output Parameters: 2061 . sfBA - The composite SF. 2062 2063 Level: developer 2064 2065 Notes: 2066 Currently, the two SFs must be defined on congruent communicators and they must be true star 2067 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2068 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2069 2070 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2071 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2072 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2073 a Reduce on sfB, on connected roots. 2074 2075 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2076 @*/ 2077 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2078 { 2079 PetscErrorCode ierr; 2080 const PetscSFNode *remotePointsA,*remotePointsB; 2081 PetscSFNode *remotePointsBA; 2082 const PetscInt *localPointsA,*localPointsB; 2083 PetscSFNode *reorderedRemotePointsA = NULL; 2084 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2085 MPI_Op op; 2086 #if defined(PETSC_USE_64BIT_INDICES) 2087 PetscBool iswin; 2088 #endif 2089 2090 PetscFunctionBegin; 2091 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2092 PetscSFCheckGraphSet(sfA,1); 2093 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2094 PetscSFCheckGraphSet(sfB,2); 2095 PetscCheckSameComm(sfA,1,sfB,2); 2096 PetscValidPointer(sfBA,3); 2097 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2098 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2099 2100 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2101 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2102 2103 /* TODO: Check roots of sfB have degree of 1 */ 2104 /* Once we implement it, we can replace the MPI_MAXLOC 2105 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 2106 We use MPI_MAXLOC only to have a deterministic output from this routine if 2107 the root condition is not meet. 2108 */ 2109 op = MPI_MAXLOC; 2110 #if defined(PETSC_USE_64BIT_INDICES) 2111 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2112 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2113 if (iswin) op = MPIU_REPLACE; 2114 #endif 2115 2116 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2117 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2118 for (i=0; i<maxleaf - minleaf + 1; i++) { 2119 reorderedRemotePointsA[i].rank = -1; 2120 reorderedRemotePointsA[i].index = -1; 2121 } 2122 if (localPointsA) { 2123 for (i=0; i<numLeavesA; i++) { 2124 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2125 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2126 } 2127 } else { 2128 for (i=0; i<numLeavesA; i++) { 2129 if (i > maxleaf || i < minleaf) continue; 2130 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2131 } 2132 } 2133 2134 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2135 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2136 for (i=0; i<numRootsB; i++) { 2137 remotePointsBA[i].rank = -1; 2138 remotePointsBA[i].index = -1; 2139 } 2140 2141 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2142 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2143 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2144 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2145 if (remotePointsBA[i].rank == -1) continue; 2146 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2147 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2148 localPointsBA[numLeavesBA] = i; 2149 numLeavesBA++; 2150 } 2151 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2152 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2153 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 /* 2158 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2159 2160 Input Parameters: 2161 . sf - The global PetscSF 2162 2163 Output Parameters: 2164 . out - The local PetscSF 2165 */ 2166 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2167 { 2168 MPI_Comm comm; 2169 PetscMPIInt myrank; 2170 const PetscInt *ilocal; 2171 const PetscSFNode *iremote; 2172 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2173 PetscSFNode *liremote; 2174 PetscSF lsf; 2175 PetscErrorCode ierr; 2176 2177 PetscFunctionBegin; 2178 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2179 if (sf->ops->CreateLocalSF) { 2180 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2181 } else { 2182 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2183 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2184 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2185 2186 /* Find out local edges and build a local SF */ 2187 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2188 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2189 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2190 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2191 2192 for (i=j=0; i<nleaves; i++) { 2193 if (iremote[i].rank == (PetscInt)myrank) { 2194 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2195 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2196 liremote[j].index = iremote[i].index; 2197 j++; 2198 } 2199 } 2200 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2201 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2202 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2203 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2204 *out = lsf; 2205 } 2206 PetscFunctionReturn(0); 2207 } 2208 2209 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2210 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2211 { 2212 PetscErrorCode ierr; 2213 PetscMemType rootmtype,leafmtype; 2214 2215 PetscFunctionBegin; 2216 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2217 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2218 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2219 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2220 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2221 if (sf->ops->BcastToZero) { 2222 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2223 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2224 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2225 PetscFunctionReturn(0); 2226 } 2227 2228