1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_HAVE_CUDA) 6 #include <cuda_runtime.h> 7 #endif 8 9 #if defined(PETSC_HAVE_HIP) 10 #include <hip/hip_runtime.h> 11 #endif 12 13 #if defined(PETSC_USE_DEBUG) 14 # define PetscSFCheckGraphSet(sf,arg) do { \ 15 if (PetscUnlikely(!(sf)->graphset)) \ 16 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 17 } while (0) 18 #else 19 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 20 #endif 21 22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 23 24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 25 { 26 PetscFunctionBegin; 27 PetscValidPointer(type,2); 28 *type = PETSC_MEMTYPE_HOST; 29 #if defined(PETSC_HAVE_CUDA) 30 if (PetscCUDAInitialized && data) { 31 cudaError_t cerr; 32 struct cudaPointerAttributes attr; 33 enum cudaMemoryType mtype; 34 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 35 cudaGetLastError(); /* Reset the last error */ 36 #if (CUDART_VERSION < 10000) 37 mtype = attr.memoryType; 38 #else 39 mtype = attr.type; 40 #endif 41 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 42 } 43 #endif 44 45 #if defined(PETSC_HAVE_HIP) 46 if (PetscHIPInitialized && data) { 47 hipError_t cerr; 48 struct hipPointerAttribute_t attr; 49 enum hipMemoryType mtype; 50 cerr = hipPointerGetAttributes(&attr,data); 51 hipGetLastError(); /* Reset the last error */ 52 mtype = attr.memoryType; 53 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 54 } 55 #endif 56 PetscFunctionReturn(0); 57 } 58 59 /*@ 60 PetscSFCreate - create a star forest communication context 61 62 Collective 63 64 Input Arguments: 65 . comm - communicator on which the star forest will operate 66 67 Output Arguments: 68 . sf - new star forest context 69 70 Options Database Keys: 71 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 72 . -sf_type window -Use MPI-3 one-sided window for communication 73 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 74 75 Level: intermediate 76 77 Notes: 78 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 79 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 80 SFs are optimized and they have better performance than general SFs. 81 82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 83 @*/ 84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 85 { 86 PetscErrorCode ierr; 87 PetscSF b; 88 89 PetscFunctionBegin; 90 PetscValidPointer(sf,2); 91 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 92 93 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 94 95 b->nroots = -1; 96 b->nleaves = -1; 97 b->minleaf = PETSC_MAX_INT; 98 b->maxleaf = PETSC_MIN_INT; 99 b->nranks = -1; 100 b->rankorder = PETSC_TRUE; 101 b->ingroup = MPI_GROUP_NULL; 102 b->outgroup = MPI_GROUP_NULL; 103 b->graphset = PETSC_FALSE; 104 #if defined(PETSC_HAVE_DEVICE) 105 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 106 b->use_stream_aware_mpi = PETSC_FALSE; 107 b->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 108 /* Set the default */ 109 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 110 b->backend = PETSCSF_BACKEND_KOKKOS; 111 #elif defined(PETSC_HAVE_CUDA) 112 b->backend = PETSCSF_BACKEND_CUDA; 113 #elif defined(PETSC_HAVE_HIP) 114 b->backend = PETSCSF_BACKEND_HIP; 115 #endif 116 #endif 117 b->vscat.from_n = -1; 118 b->vscat.to_n = -1; 119 b->vscat.unit = MPIU_SCALAR; 120 *sf = b; 121 PetscFunctionReturn(0); 122 } 123 124 /*@ 125 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 126 127 Collective 128 129 Input Arguments: 130 . sf - star forest 131 132 Level: advanced 133 134 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 135 @*/ 136 PetscErrorCode PetscSFReset(PetscSF sf) 137 { 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 142 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 143 sf->nroots = -1; 144 sf->nleaves = -1; 145 sf->minleaf = PETSC_MAX_INT; 146 sf->maxleaf = PETSC_MIN_INT; 147 sf->mine = NULL; 148 sf->remote = NULL; 149 sf->graphset = PETSC_FALSE; 150 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 151 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 152 sf->nranks = -1; 153 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 154 #if defined(PETSC_HAVE_DEVICE) 155 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 156 #endif 157 sf->degreeknown = PETSC_FALSE; 158 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 159 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 160 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 161 if (sf->multi) sf->multi->multi = NULL; 162 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 163 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 164 sf->setupcalled = PETSC_FALSE; 165 PetscFunctionReturn(0); 166 } 167 168 /*@C 169 PetscSFSetType - Set the PetscSF communication implementation 170 171 Collective on PetscSF 172 173 Input Parameters: 174 + sf - the PetscSF context 175 - type - a known method 176 177 Options Database Key: 178 . -sf_type <type> - Sets the method; use -help for a list 179 of available methods (for instance, window, basic, neighbor) 180 181 Notes: 182 See "include/petscsf.h" for available methods (for instance) 183 + PETSCSFWINDOW - MPI-2/3 one-sided 184 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 185 186 Level: intermediate 187 188 .seealso: PetscSFType, PetscSFCreate() 189 @*/ 190 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 191 { 192 PetscErrorCode ierr,(*r)(PetscSF); 193 PetscBool match; 194 195 PetscFunctionBegin; 196 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 197 PetscValidCharPointer(type,2); 198 199 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 200 if (match) PetscFunctionReturn(0); 201 202 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 203 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 204 /* Destroy the previous PetscSF implementation context */ 205 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 206 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 207 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 208 ierr = (*r)(sf);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 /*@C 213 PetscSFGetType - Get the PetscSF communication implementation 214 215 Not Collective 216 217 Input Parameter: 218 . sf - the PetscSF context 219 220 Output Parameter: 221 . type - the PetscSF type name 222 223 Level: intermediate 224 225 .seealso: PetscSFSetType(), PetscSFCreate() 226 @*/ 227 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 228 { 229 PetscFunctionBegin; 230 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 231 PetscValidPointer(type,2); 232 *type = ((PetscObject)sf)->type_name; 233 PetscFunctionReturn(0); 234 } 235 236 /*@ 237 PetscSFDestroy - destroy star forest 238 239 Collective 240 241 Input Arguments: 242 . sf - address of star forest 243 244 Level: intermediate 245 246 .seealso: PetscSFCreate(), PetscSFReset() 247 @*/ 248 PetscErrorCode PetscSFDestroy(PetscSF *sf) 249 { 250 PetscErrorCode ierr; 251 252 PetscFunctionBegin; 253 if (!*sf) PetscFunctionReturn(0); 254 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 255 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 256 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 257 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 258 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 259 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRQ(ierr);} 260 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 265 { 266 PetscInt i, nleaves; 267 PetscMPIInt size; 268 const PetscInt *ilocal; 269 const PetscSFNode *iremote; 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 274 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 275 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 276 for (i = 0; i < nleaves; i++) { 277 const PetscInt rank = iremote[i].rank; 278 const PetscInt remote = iremote[i].index; 279 const PetscInt leaf = ilocal ? ilocal[i] : i; 280 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 281 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 282 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 283 } 284 PetscFunctionReturn(0); 285 } 286 287 /*@ 288 PetscSFSetUp - set up communication structures 289 290 Collective 291 292 Input Arguments: 293 . sf - star forest communication object 294 295 Level: beginner 296 297 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 298 @*/ 299 PetscErrorCode PetscSFSetUp(PetscSF sf) 300 { 301 PetscErrorCode ierr; 302 303 PetscFunctionBegin; 304 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 305 PetscSFCheckGraphSet(sf,1); 306 if (sf->setupcalled) PetscFunctionReturn(0); 307 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 308 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 309 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 310 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 311 #if defined(PETSC_HAVE_CUDA) 312 if (sf->backend == PETSCSF_BACKEND_CUDA) { 313 sf->ops->Malloc = PetscSFMalloc_Cuda; 314 sf->ops->Free = PetscSFFree_Cuda; 315 } 316 #endif 317 #if defined(PETSC_HAVE_HIP) 318 if (sf->backend == PETSCSF_BACKEND_HIP) { 319 sf->ops->Malloc = PetscSFMalloc_HIP; 320 sf->ops->Free = PetscSFFree_HIP; 321 } 322 #endif 323 324 # 325 #if defined(PETSC_HAVE_KOKKOS) 326 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 327 sf->ops->Malloc = PetscSFMalloc_Kokkos; 328 sf->ops->Free = PetscSFFree_Kokkos; 329 } 330 #endif 331 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 332 sf->setupcalled = PETSC_TRUE; 333 PetscFunctionReturn(0); 334 } 335 336 /*@ 337 PetscSFSetFromOptions - set PetscSF options using the options database 338 339 Logically Collective 340 341 Input Arguments: 342 . sf - star forest 343 344 Options Database Keys: 345 + -sf_type - implementation type, see PetscSFSetType() 346 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 347 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 348 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 349 If true, this option only works with -use_gpu_aware_mpi 1. 350 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 351 If true, this option only works with -use_gpu_aware_mpi 1. 352 353 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 354 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 355 the only available is kokkos. 356 357 Level: intermediate 358 @*/ 359 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 360 { 361 PetscSFType deft; 362 char type[256]; 363 PetscErrorCode ierr; 364 PetscBool flg; 365 366 PetscFunctionBegin; 367 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 368 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 369 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 370 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 371 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 372 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 373 #if defined(PETSC_HAVE_DEVICE) 374 { 375 char backendstr[32] = {0}; 376 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 377 /* Change the defaults set in PetscSFCreate() with command line options */ 378 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 379 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 380 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 381 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 382 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 383 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 384 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 385 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 386 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 387 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 388 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 389 #elif defined(PETSC_HAVE_KOKKOS) 390 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 391 #endif 392 } 393 #endif 394 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 395 ierr = PetscOptionsEnd();CHKERRQ(ierr); 396 PetscFunctionReturn(0); 397 } 398 399 /*@ 400 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 401 402 Logically Collective 403 404 Input Arguments: 405 + sf - star forest 406 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 407 408 Level: advanced 409 410 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 411 @*/ 412 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 413 { 414 PetscFunctionBegin; 415 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 416 PetscValidLogicalCollectiveBool(sf,flg,2); 417 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 418 sf->rankorder = flg; 419 PetscFunctionReturn(0); 420 } 421 422 /*@ 423 PetscSFSetGraph - Set a parallel star forest 424 425 Collective 426 427 Input Arguments: 428 + sf - star forest 429 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 430 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 431 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 432 during setup in debug mode) 433 . localmode - copy mode for ilocal 434 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 435 during setup in debug mode) 436 - remotemode - copy mode for iremote 437 438 Level: intermediate 439 440 Notes: 441 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 442 443 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 444 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 445 needed 446 447 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 448 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 449 450 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 451 @*/ 452 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 453 { 454 PetscErrorCode ierr; 455 456 PetscFunctionBegin; 457 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 458 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 459 if (nleaves > 0) PetscValidPointer(iremote,6); 460 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 461 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 462 463 if (sf->nroots >= 0) { /* Reset only if graph already set */ 464 ierr = PetscSFReset(sf);CHKERRQ(ierr); 465 } 466 467 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 468 469 sf->nroots = nroots; 470 sf->nleaves = nleaves; 471 472 if (nleaves && ilocal) { 473 PetscInt i; 474 PetscInt minleaf = PETSC_MAX_INT; 475 PetscInt maxleaf = PETSC_MIN_INT; 476 int contiguous = 1; 477 for (i=0; i<nleaves; i++) { 478 minleaf = PetscMin(minleaf,ilocal[i]); 479 maxleaf = PetscMax(maxleaf,ilocal[i]); 480 contiguous &= (ilocal[i] == i); 481 } 482 sf->minleaf = minleaf; 483 sf->maxleaf = maxleaf; 484 if (contiguous) { 485 if (localmode == PETSC_OWN_POINTER) { 486 ierr = PetscFree(ilocal);CHKERRQ(ierr); 487 } 488 ilocal = NULL; 489 } 490 } else { 491 sf->minleaf = 0; 492 sf->maxleaf = nleaves - 1; 493 } 494 495 if (ilocal) { 496 switch (localmode) { 497 case PETSC_COPY_VALUES: 498 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 499 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 500 sf->mine = sf->mine_alloc; 501 break; 502 case PETSC_OWN_POINTER: 503 sf->mine_alloc = (PetscInt*)ilocal; 504 sf->mine = sf->mine_alloc; 505 break; 506 case PETSC_USE_POINTER: 507 sf->mine_alloc = NULL; 508 sf->mine = (PetscInt*)ilocal; 509 break; 510 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 511 } 512 } 513 514 switch (remotemode) { 515 case PETSC_COPY_VALUES: 516 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 517 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 518 sf->remote = sf->remote_alloc; 519 break; 520 case PETSC_OWN_POINTER: 521 sf->remote_alloc = (PetscSFNode*)iremote; 522 sf->remote = sf->remote_alloc; 523 break; 524 case PETSC_USE_POINTER: 525 sf->remote_alloc = NULL; 526 sf->remote = (PetscSFNode*)iremote; 527 break; 528 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 529 } 530 531 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 532 sf->graphset = PETSC_TRUE; 533 PetscFunctionReturn(0); 534 } 535 536 /*@ 537 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 538 539 Collective 540 541 Input Parameters: 542 + sf - The PetscSF 543 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 544 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 545 546 Notes: 547 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 548 n and N are local and global sizes of x respectively. 549 550 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 551 sequential vectors y on all ranks. 552 553 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 554 sequential vector y on rank 0. 555 556 In above cases, entries of x are roots and entries of y are leaves. 557 558 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 559 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 560 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 561 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 562 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 563 564 In this case, roots and leaves are symmetric. 565 566 Level: intermediate 567 @*/ 568 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 569 { 570 MPI_Comm comm; 571 PetscInt n,N,res[2]; 572 PetscMPIInt rank,size; 573 PetscSFType type; 574 PetscErrorCode ierr; 575 576 PetscFunctionBegin; 577 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 578 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 579 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 580 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 581 582 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 583 type = PETSCSFALLTOALL; 584 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 585 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 586 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 587 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 588 } else { 589 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 590 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 591 res[0] = n; 592 res[1] = -n; 593 /* Check if n are same over all ranks so that we can optimize it */ 594 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 595 if (res[0] == -res[1]) { /* same n */ 596 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 597 } else { 598 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 599 } 600 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 601 } 602 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 603 604 sf->pattern = pattern; 605 sf->mine = NULL; /* Contiguous */ 606 607 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 608 Also set other easy stuff. 609 */ 610 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 611 sf->nleaves = N; 612 sf->nroots = n; 613 sf->nranks = size; 614 sf->minleaf = 0; 615 sf->maxleaf = N - 1; 616 } else if (pattern == PETSCSF_PATTERN_GATHER) { 617 sf->nleaves = rank ? 0 : N; 618 sf->nroots = n; 619 sf->nranks = rank ? 0 : size; 620 sf->minleaf = 0; 621 sf->maxleaf = rank ? -1 : N - 1; 622 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 623 sf->nleaves = size; 624 sf->nroots = size; 625 sf->nranks = size; 626 sf->minleaf = 0; 627 sf->maxleaf = size - 1; 628 } 629 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 630 sf->graphset = PETSC_TRUE; 631 PetscFunctionReturn(0); 632 } 633 634 /*@ 635 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 636 637 Collective 638 639 Input Arguments: 640 641 . sf - star forest to invert 642 643 Output Arguments: 644 . isf - inverse of sf 645 Level: advanced 646 647 Notes: 648 All roots must have degree 1. 649 650 The local space may be a permutation, but cannot be sparse. 651 652 .seealso: PetscSFSetGraph() 653 @*/ 654 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 655 { 656 PetscErrorCode ierr; 657 PetscMPIInt rank; 658 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 659 const PetscInt *ilocal; 660 PetscSFNode *roots,*leaves; 661 662 PetscFunctionBegin; 663 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 664 PetscSFCheckGraphSet(sf,1); 665 PetscValidPointer(isf,2); 666 667 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 668 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 669 670 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 671 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 672 for (i=0; i<maxlocal; i++) { 673 leaves[i].rank = rank; 674 leaves[i].index = i; 675 } 676 for (i=0; i <nroots; i++) { 677 roots[i].rank = -1; 678 roots[i].index = -1; 679 } 680 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 681 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 682 683 /* Check whether our leaves are sparse */ 684 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 685 if (count == nroots) newilocal = NULL; 686 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 687 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 688 for (i=0,count=0; i<nroots; i++) { 689 if (roots[i].rank >= 0) { 690 newilocal[count] = i; 691 roots[count].rank = roots[i].rank; 692 roots[count].index = roots[i].index; 693 count++; 694 } 695 } 696 } 697 698 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 699 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 700 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 /*@ 705 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 706 707 Collective 708 709 Input Arguments: 710 + sf - communication object to duplicate 711 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 712 713 Output Arguments: 714 . newsf - new communication object 715 716 Level: beginner 717 718 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 719 @*/ 720 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 721 { 722 PetscSFType type; 723 PetscErrorCode ierr; 724 MPI_Datatype dtype=MPIU_SCALAR; 725 726 PetscFunctionBegin; 727 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 728 PetscValidLogicalCollectiveEnum(sf,opt,2); 729 PetscValidPointer(newsf,3); 730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 731 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 732 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 733 if (opt == PETSCSF_DUPLICATE_GRAPH) { 734 PetscSFCheckGraphSet(sf,1); 735 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 736 PetscInt nroots,nleaves; 737 const PetscInt *ilocal; 738 const PetscSFNode *iremote; 739 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 740 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 741 } else { 742 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 743 } 744 } 745 /* Since oldtype is committed, so is newtype, according to MPI */ 746 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRQ(ierr);} 747 (*newsf)->vscat.bs = sf->vscat.bs; 748 (*newsf)->vscat.unit = dtype; 749 (*newsf)->vscat.to_n = sf->vscat.to_n; 750 (*newsf)->vscat.from_n = sf->vscat.from_n; 751 /* Do not copy lsf. Build it on demand since it is rarely used */ 752 753 #if defined(PETSC_HAVE_DEVICE) 754 (*newsf)->backend = sf->backend; 755 (*newsf)->use_default_stream = sf->use_default_stream; 756 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 757 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 758 #endif 759 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 760 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 761 PetscFunctionReturn(0); 762 } 763 764 /*@C 765 PetscSFGetGraph - Get the graph specifying a parallel star forest 766 767 Not Collective 768 769 Input Arguments: 770 . sf - star forest 771 772 Output Arguments: 773 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 774 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 775 . ilocal - locations of leaves in leafdata buffers 776 - iremote - remote locations of root vertices for each leaf on the current process 777 778 Notes: 779 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 780 781 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 782 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 783 784 Level: intermediate 785 786 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 787 @*/ 788 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 789 { 790 PetscErrorCode ierr; 791 792 PetscFunctionBegin; 793 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 794 if (sf->ops->GetGraph) { 795 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 796 } else { 797 if (nroots) *nroots = sf->nroots; 798 if (nleaves) *nleaves = sf->nleaves; 799 if (ilocal) *ilocal = sf->mine; 800 if (iremote) *iremote = sf->remote; 801 } 802 PetscFunctionReturn(0); 803 } 804 805 /*@ 806 PetscSFGetLeafRange - Get the active leaf ranges 807 808 Not Collective 809 810 Input Arguments: 811 . sf - star forest 812 813 Output Arguments: 814 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 815 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 816 817 Level: developer 818 819 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 820 @*/ 821 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 822 { 823 PetscFunctionBegin; 824 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 825 PetscSFCheckGraphSet(sf,1); 826 if (minleaf) *minleaf = sf->minleaf; 827 if (maxleaf) *maxleaf = sf->maxleaf; 828 PetscFunctionReturn(0); 829 } 830 831 /*@C 832 PetscSFViewFromOptions - View from Options 833 834 Collective on PetscSF 835 836 Input Parameters: 837 + A - the star forest 838 . obj - Optional object 839 - name - command line option 840 841 Level: intermediate 842 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 843 @*/ 844 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 845 { 846 PetscErrorCode ierr; 847 848 PetscFunctionBegin; 849 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 850 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 /*@C 855 PetscSFView - view a star forest 856 857 Collective 858 859 Input Arguments: 860 + sf - star forest 861 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 862 863 Level: beginner 864 865 .seealso: PetscSFCreate(), PetscSFSetGraph() 866 @*/ 867 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 868 { 869 PetscErrorCode ierr; 870 PetscBool iascii; 871 PetscViewerFormat format; 872 873 PetscFunctionBegin; 874 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 875 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 876 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 877 PetscCheckSameComm(sf,1,viewer,2); 878 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 879 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 880 if (iascii) { 881 PetscMPIInt rank; 882 PetscInt ii,i,j; 883 884 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 885 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 886 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 887 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 888 if (!sf->graphset) { 889 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 890 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 894 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 896 for (i=0; i<sf->nleaves; i++) { 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 898 } 899 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 900 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 901 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 902 PetscMPIInt *tmpranks,*perm; 903 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 904 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 905 for (i=0; i<sf->nranks; i++) perm[i] = i; 906 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 907 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 908 for (ii=0; ii<sf->nranks; ii++) { 909 i = perm[ii]; 910 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 911 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 912 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 913 } 914 } 915 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 916 } 917 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 918 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 919 } 920 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } 924 925 /*@C 926 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 927 928 Not Collective 929 930 Input Arguments: 931 . sf - star forest 932 933 Output Arguments: 934 + nranks - number of ranks referenced by local part 935 . ranks - array of ranks 936 . roffset - offset in rmine/rremote for each rank (length nranks+1) 937 . rmine - concatenated array holding local indices referencing each remote rank 938 - rremote - concatenated array holding remote indices referenced for each remote rank 939 940 Level: developer 941 942 .seealso: PetscSFGetLeafRanks() 943 @*/ 944 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 945 { 946 PetscErrorCode ierr; 947 948 PetscFunctionBegin; 949 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 950 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 951 if (sf->ops->GetRootRanks) { 952 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 953 } else { 954 /* The generic implementation */ 955 if (nranks) *nranks = sf->nranks; 956 if (ranks) *ranks = sf->ranks; 957 if (roffset) *roffset = sf->roffset; 958 if (rmine) *rmine = sf->rmine; 959 if (rremote) *rremote = sf->rremote; 960 } 961 PetscFunctionReturn(0); 962 } 963 964 /*@C 965 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 966 967 Not Collective 968 969 Input Arguments: 970 . sf - star forest 971 972 Output Arguments: 973 + niranks - number of leaf ranks referencing roots on this process 974 . iranks - array of ranks 975 . ioffset - offset in irootloc for each rank (length niranks+1) 976 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 977 978 Level: developer 979 980 .seealso: PetscSFGetRootRanks() 981 @*/ 982 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 983 { 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 988 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 989 if (sf->ops->GetLeafRanks) { 990 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 991 } else { 992 PetscSFType type; 993 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 994 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 995 } 996 PetscFunctionReturn(0); 997 } 998 999 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 1000 PetscInt i; 1001 for (i=0; i<n; i++) { 1002 if (needle == list[i]) return PETSC_TRUE; 1003 } 1004 return PETSC_FALSE; 1005 } 1006 1007 /*@C 1008 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 1009 1010 Collective 1011 1012 Input Arguments: 1013 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1014 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1015 1016 Level: developer 1017 1018 .seealso: PetscSFGetRootRanks() 1019 @*/ 1020 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1021 { 1022 PetscErrorCode ierr; 1023 PetscTable table; 1024 PetscTablePosition pos; 1025 PetscMPIInt size,groupsize,*groupranks; 1026 PetscInt *rcount,*ranks; 1027 PetscInt i, irank = -1,orank = -1; 1028 1029 PetscFunctionBegin; 1030 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1031 PetscSFCheckGraphSet(sf,1); 1032 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1033 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1034 for (i=0; i<sf->nleaves; i++) { 1035 /* Log 1-based rank */ 1036 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1037 } 1038 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1039 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1040 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1041 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1042 for (i=0; i<sf->nranks; i++) { 1043 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1044 ranks[i]--; /* Convert back to 0-based */ 1045 } 1046 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1047 1048 /* We expect that dgroup is reliably "small" while nranks could be large */ 1049 { 1050 MPI_Group group = MPI_GROUP_NULL; 1051 PetscMPIInt *dgroupranks; 1052 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1053 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1054 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1055 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1056 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1057 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1058 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1059 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1060 } 1061 1062 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1063 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1064 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1065 if (InList(ranks[i],groupsize,groupranks)) break; 1066 } 1067 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1068 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1069 } 1070 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1071 PetscInt tmprank,tmpcount; 1072 1073 tmprank = ranks[i]; 1074 tmpcount = rcount[i]; 1075 ranks[i] = ranks[sf->ndranks]; 1076 rcount[i] = rcount[sf->ndranks]; 1077 ranks[sf->ndranks] = tmprank; 1078 rcount[sf->ndranks] = tmpcount; 1079 sf->ndranks++; 1080 } 1081 } 1082 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1083 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1084 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1085 sf->roffset[0] = 0; 1086 for (i=0; i<sf->nranks; i++) { 1087 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1088 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1089 rcount[i] = 0; 1090 } 1091 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1092 /* short circuit */ 1093 if (orank != sf->remote[i].rank) { 1094 /* Search for index of iremote[i].rank in sf->ranks */ 1095 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1096 if (irank < 0) { 1097 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1098 if (irank >= 0) irank += sf->ndranks; 1099 } 1100 orank = sf->remote[i].rank; 1101 } 1102 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1103 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1104 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1105 rcount[irank]++; 1106 } 1107 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 /*@C 1112 PetscSFGetGroups - gets incoming and outgoing process groups 1113 1114 Collective 1115 1116 Input Argument: 1117 . sf - star forest 1118 1119 Output Arguments: 1120 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1121 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1122 1123 Level: developer 1124 1125 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1126 @*/ 1127 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1128 { 1129 PetscErrorCode ierr; 1130 MPI_Group group = MPI_GROUP_NULL; 1131 1132 PetscFunctionBegin; 1133 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1134 if (sf->ingroup == MPI_GROUP_NULL) { 1135 PetscInt i; 1136 const PetscInt *indegree; 1137 PetscMPIInt rank,*outranks,*inranks; 1138 PetscSFNode *remote; 1139 PetscSF bgcount; 1140 1141 /* Compute the number of incoming ranks */ 1142 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1143 for (i=0; i<sf->nranks; i++) { 1144 remote[i].rank = sf->ranks[i]; 1145 remote[i].index = 0; 1146 } 1147 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1148 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1149 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1150 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1151 /* Enumerate the incoming ranks */ 1152 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1153 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1154 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1155 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1156 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1157 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1158 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1159 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1160 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1161 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1162 } 1163 *incoming = sf->ingroup; 1164 1165 if (sf->outgroup == MPI_GROUP_NULL) { 1166 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1167 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1168 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1169 } 1170 *outgoing = sf->outgroup; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 /*@ 1175 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1176 1177 Collective 1178 1179 Input Argument: 1180 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1181 1182 Output Arguments: 1183 . multi - star forest with split roots, such that each root has degree exactly 1 1184 1185 Level: developer 1186 1187 Notes: 1188 1189 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1190 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1191 edge, it is a candidate for future optimization that might involve its removal. 1192 1193 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1194 @*/ 1195 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1196 { 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1201 PetscValidPointer(multi,2); 1202 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1203 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1204 *multi = sf->multi; 1205 sf->multi->multi = sf->multi; 1206 PetscFunctionReturn(0); 1207 } 1208 if (!sf->multi) { 1209 const PetscInt *indegree; 1210 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1211 PetscSFNode *remote; 1212 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1213 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1214 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1215 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1216 inoffset[0] = 0; 1217 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1218 for (i=0; i<maxlocal; i++) outones[i] = 1; 1219 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1220 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1221 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1222 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1223 for (i=0; i<sf->nroots; i++) { 1224 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1225 } 1226 } 1227 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1228 for (i=0; i<sf->nleaves; i++) { 1229 remote[i].rank = sf->remote[i].rank; 1230 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1231 } 1232 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1233 sf->multi->multi = sf->multi; 1234 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1235 if (sf->rankorder) { /* Sort the ranks */ 1236 PetscMPIInt rank; 1237 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1238 PetscSFNode *newremote; 1239 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1240 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1241 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1242 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1243 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1244 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1245 /* Sort the incoming ranks at each vertex, build the inverse map */ 1246 for (i=0; i<sf->nroots; i++) { 1247 PetscInt j; 1248 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1249 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1250 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1251 } 1252 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1253 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1254 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1255 for (i=0; i<sf->nleaves; i++) { 1256 newremote[i].rank = sf->remote[i].rank; 1257 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1258 } 1259 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1260 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1261 } 1262 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1263 } 1264 *multi = sf->multi; 1265 PetscFunctionReturn(0); 1266 } 1267 1268 /*@C 1269 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1270 1271 Collective 1272 1273 Input Arguments: 1274 + sf - original star forest 1275 . nselected - number of selected roots on this process 1276 - selected - indices of the selected roots on this process 1277 1278 Output Arguments: 1279 . esf - new star forest 1280 1281 Level: advanced 1282 1283 Note: 1284 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1285 be done by calling PetscSFGetGraph(). 1286 1287 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1288 @*/ 1289 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1290 { 1291 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1292 const PetscInt *ilocal; 1293 signed char *rootdata,*leafdata,*leafmem; 1294 const PetscSFNode *iremote; 1295 PetscSFNode *new_iremote; 1296 MPI_Comm comm; 1297 PetscErrorCode ierr; 1298 1299 PetscFunctionBegin; 1300 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1301 PetscSFCheckGraphSet(sf,1); 1302 if (nselected) PetscValidPointer(selected,3); 1303 PetscValidPointer(esf,4); 1304 1305 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1306 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1307 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1308 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1309 1310 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1311 PetscBool dups; 1312 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1313 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1314 for (i=0; i<nselected; i++) 1315 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1316 } 1317 1318 if (sf->ops->CreateEmbeddedSF) { 1319 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1320 } else { 1321 /* A generic version of creating embedded sf */ 1322 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1323 maxlocal = maxleaf - minleaf + 1; 1324 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1325 leafdata = leafmem - minleaf; 1326 /* Tag selected roots and bcast to leaves */ 1327 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1328 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1329 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1330 1331 /* Build esf with leaves that are still connected */ 1332 esf_nleaves = 0; 1333 for (i=0; i<nleaves; i++) { 1334 j = ilocal ? ilocal[i] : i; 1335 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1336 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1337 */ 1338 esf_nleaves += (leafdata[j] ? 1 : 0); 1339 } 1340 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1341 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1342 for (i=n=0; i<nleaves; i++) { 1343 j = ilocal ? ilocal[i] : i; 1344 if (leafdata[j]) { 1345 new_ilocal[n] = j; 1346 new_iremote[n].rank = iremote[i].rank; 1347 new_iremote[n].index = iremote[i].index; 1348 ++n; 1349 } 1350 } 1351 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1352 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1353 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1354 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1355 } 1356 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 /*@C 1361 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1362 1363 Collective 1364 1365 Input Arguments: 1366 + sf - original star forest 1367 . nselected - number of selected leaves on this process 1368 - selected - indices of the selected leaves on this process 1369 1370 Output Arguments: 1371 . newsf - new star forest 1372 1373 Level: advanced 1374 1375 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1376 @*/ 1377 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1378 { 1379 const PetscSFNode *iremote; 1380 PetscSFNode *new_iremote; 1381 const PetscInt *ilocal; 1382 PetscInt i,nroots,*leaves,*new_ilocal; 1383 MPI_Comm comm; 1384 PetscErrorCode ierr; 1385 1386 PetscFunctionBegin; 1387 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1388 PetscSFCheckGraphSet(sf,1); 1389 if (nselected) PetscValidPointer(selected,3); 1390 PetscValidPointer(newsf,4); 1391 1392 /* Uniq selected[] and put results in leaves[] */ 1393 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1394 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1395 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1396 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1397 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1398 1399 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1400 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1401 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1402 } else { 1403 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1404 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1405 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1406 for (i=0; i<nselected; ++i) { 1407 const PetscInt l = leaves[i]; 1408 new_ilocal[i] = ilocal ? ilocal[l] : l; 1409 new_iremote[i].rank = iremote[l].rank; 1410 new_iremote[i].index = iremote[l].index; 1411 } 1412 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1413 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1414 } 1415 ierr = PetscFree(leaves);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 /*@C 1420 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1421 1422 Collective on PetscSF 1423 1424 Input Arguments: 1425 + sf - star forest on which to communicate 1426 . unit - data type associated with each node 1427 . rootdata - buffer to broadcast 1428 - op - operation to use for reduction 1429 1430 Output Arguments: 1431 . leafdata - buffer to be reduced with values from each leaf's respective root 1432 1433 Level: intermediate 1434 1435 Notes: 1436 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1437 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1438 use PetscSFBcastAndOpWithMemTypeBegin() instead. 1439 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin() 1440 @*/ 1441 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1442 { 1443 PetscErrorCode ierr; 1444 PetscMemType rootmtype,leafmtype; 1445 1446 PetscFunctionBegin; 1447 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1448 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1449 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1450 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1451 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1452 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1453 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1454 PetscFunctionReturn(0); 1455 } 1456 1457 /*@C 1458 PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd() 1459 1460 Collective on PetscSF 1461 1462 Input Arguments: 1463 + sf - star forest on which to communicate 1464 . unit - data type associated with each node 1465 . rootmtype - memory type of rootdata 1466 . rootdata - buffer to broadcast 1467 . leafmtype - memory type of leafdata 1468 - op - operation to use for reduction 1469 1470 Output Arguments: 1471 . leafdata - buffer to be reduced with values from each leaf's respective root 1472 1473 Level: intermediate 1474 1475 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin() 1476 @*/ 1477 PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1478 { 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1483 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1484 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1485 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1486 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1487 PetscFunctionReturn(0); 1488 } 1489 1490 /*@C 1491 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1492 1493 Collective 1494 1495 Input Arguments: 1496 + sf - star forest 1497 . unit - data type 1498 . rootdata - buffer to broadcast 1499 - op - operation to use for reduction 1500 1501 Output Arguments: 1502 . leafdata - buffer to be reduced with values from each leaf's respective root 1503 1504 Level: intermediate 1505 1506 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1507 @*/ 1508 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1509 { 1510 PetscErrorCode ierr; 1511 1512 PetscFunctionBegin; 1513 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1514 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);} 1515 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1516 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);} 1517 PetscFunctionReturn(0); 1518 } 1519 1520 /*@C 1521 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1522 1523 Collective 1524 1525 Input Arguments: 1526 + sf - star forest 1527 . unit - data type 1528 . leafdata - values to reduce 1529 - op - reduction operation 1530 1531 Output Arguments: 1532 . rootdata - result of reduction of values from all leaves of each root 1533 1534 Level: intermediate 1535 1536 Notes: 1537 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1538 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1539 use PetscSFReduceWithMemTypeBegin() instead. 1540 1541 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1542 @*/ 1543 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1544 { 1545 PetscErrorCode ierr; 1546 PetscMemType rootmtype,leafmtype; 1547 1548 PetscFunctionBegin; 1549 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1550 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1551 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1552 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1553 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1554 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1555 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1556 PetscFunctionReturn(0); 1557 } 1558 1559 /*@C 1560 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1561 1562 Collective 1563 1564 Input Arguments: 1565 + sf - star forest 1566 . unit - data type 1567 . leafmtype - memory type of leafdata 1568 . leafdata - values to reduce 1569 . rootmtype - memory type of rootdata 1570 - op - reduction operation 1571 1572 Output Arguments: 1573 . rootdata - result of reduction of values from all leaves of each root 1574 1575 Level: intermediate 1576 1577 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1578 @*/ 1579 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1580 { 1581 PetscErrorCode ierr; 1582 1583 PetscFunctionBegin; 1584 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1585 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1586 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1587 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1588 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1589 PetscFunctionReturn(0); 1590 } 1591 1592 /*@C 1593 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1594 1595 Collective 1596 1597 Input Arguments: 1598 + sf - star forest 1599 . unit - data type 1600 . leafdata - values to reduce 1601 - op - reduction operation 1602 1603 Output Arguments: 1604 . rootdata - result of reduction of values from all leaves of each root 1605 1606 Level: intermediate 1607 1608 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1609 @*/ 1610 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1611 { 1612 PetscErrorCode ierr; 1613 1614 PetscFunctionBegin; 1615 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1616 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1617 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1618 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1619 PetscFunctionReturn(0); 1620 } 1621 1622 /*@C 1623 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1624 1625 Collective 1626 1627 Input Arguments: 1628 + sf - star forest 1629 . unit - data type 1630 . leafdata - leaf values to use in reduction 1631 - op - operation to use for reduction 1632 1633 Output Arguments: 1634 + rootdata - root values to be updated, input state is seen by first process to perform an update 1635 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1636 1637 Level: advanced 1638 1639 Note: 1640 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1641 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1642 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1643 integers. 1644 1645 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1646 @*/ 1647 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1648 { 1649 PetscErrorCode ierr; 1650 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1651 1652 PetscFunctionBegin; 1653 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1654 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1655 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1656 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1657 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1658 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1659 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1660 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1661 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1662 PetscFunctionReturn(0); 1663 } 1664 1665 /*@C 1666 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1667 1668 Collective 1669 1670 Input Arguments: 1671 + sf - star forest 1672 . unit - data type 1673 . leafdata - leaf values to use in reduction 1674 - op - operation to use for reduction 1675 1676 Output Arguments: 1677 + rootdata - root values to be updated, input state is seen by first process to perform an update 1678 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1679 1680 Level: advanced 1681 1682 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1683 @*/ 1684 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1685 { 1686 PetscErrorCode ierr; 1687 1688 PetscFunctionBegin; 1689 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1690 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1691 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1692 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1693 PetscFunctionReturn(0); 1694 } 1695 1696 /*@C 1697 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1698 1699 Collective 1700 1701 Input Arguments: 1702 . sf - star forest 1703 1704 Output Arguments: 1705 . degree - degree of each root vertex 1706 1707 Level: advanced 1708 1709 Notes: 1710 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1711 1712 .seealso: PetscSFGatherBegin() 1713 @*/ 1714 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1715 { 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1720 PetscSFCheckGraphSet(sf,1); 1721 PetscValidPointer(degree,2); 1722 if (!sf->degreeknown) { 1723 PetscInt i, nroots = sf->nroots, maxlocal; 1724 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1725 maxlocal = sf->maxleaf-sf->minleaf+1; 1726 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1727 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1728 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1729 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1730 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1731 } 1732 *degree = NULL; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 /*@C 1737 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1738 1739 Collective 1740 1741 Input Arguments: 1742 . sf - star forest 1743 1744 Output Arguments: 1745 . degree - degree of each root vertex 1746 1747 Level: developer 1748 1749 Notes: 1750 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1751 1752 .seealso: 1753 @*/ 1754 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1755 { 1756 PetscErrorCode ierr; 1757 1758 PetscFunctionBegin; 1759 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1760 PetscSFCheckGraphSet(sf,1); 1761 PetscValidPointer(degree,2); 1762 if (!sf->degreeknown) { 1763 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1764 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1765 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1766 sf->degreeknown = PETSC_TRUE; 1767 } 1768 *degree = sf->degree; 1769 PetscFunctionReturn(0); 1770 } 1771 1772 1773 /*@C 1774 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1775 Each multi-root is assigned index of the corresponding original root. 1776 1777 Collective 1778 1779 Input Arguments: 1780 + sf - star forest 1781 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1782 1783 Output Arguments: 1784 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1785 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1786 1787 Level: developer 1788 1789 Notes: 1790 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1791 1792 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1793 @*/ 1794 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1795 { 1796 PetscSF msf; 1797 PetscInt i, j, k, nroots, nmroots; 1798 PetscErrorCode ierr; 1799 1800 PetscFunctionBegin; 1801 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1802 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1803 if (nroots) PetscValidIntPointer(degree,2); 1804 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1805 PetscValidPointer(multiRootsOrigNumbering,4); 1806 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1807 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1808 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1809 for (i=0,j=0,k=0; i<nroots; i++) { 1810 if (!degree[i]) continue; 1811 for (j=0; j<degree[i]; j++,k++) { 1812 (*multiRootsOrigNumbering)[k] = i; 1813 } 1814 } 1815 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1816 if (nMultiRoots) *nMultiRoots = nmroots; 1817 PetscFunctionReturn(0); 1818 } 1819 1820 /*@C 1821 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1822 1823 Collective 1824 1825 Input Arguments: 1826 + sf - star forest 1827 . unit - data type 1828 - leafdata - leaf data to gather to roots 1829 1830 Output Argument: 1831 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1832 1833 Level: intermediate 1834 1835 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1836 @*/ 1837 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1838 { 1839 PetscErrorCode ierr; 1840 PetscSF multi = NULL; 1841 1842 PetscFunctionBegin; 1843 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1844 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1845 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1846 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 /*@C 1851 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1852 1853 Collective 1854 1855 Input Arguments: 1856 + sf - star forest 1857 . unit - data type 1858 - leafdata - leaf data to gather to roots 1859 1860 Output Argument: 1861 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1862 1863 Level: intermediate 1864 1865 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1866 @*/ 1867 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1868 { 1869 PetscErrorCode ierr; 1870 PetscSF multi = NULL; 1871 1872 PetscFunctionBegin; 1873 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1874 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1875 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 /*@C 1880 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1881 1882 Collective 1883 1884 Input Arguments: 1885 + sf - star forest 1886 . unit - data type 1887 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1888 1889 Output Argument: 1890 . leafdata - leaf data to be update with personal data from each respective root 1891 1892 Level: intermediate 1893 1894 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1895 @*/ 1896 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1897 { 1898 PetscErrorCode ierr; 1899 PetscSF multi = NULL; 1900 1901 PetscFunctionBegin; 1902 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1903 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1904 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1905 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 /*@C 1910 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1911 1912 Collective 1913 1914 Input Arguments: 1915 + sf - star forest 1916 . unit - data type 1917 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1918 1919 Output Argument: 1920 . leafdata - leaf data to be update with personal data from each respective root 1921 1922 Level: intermediate 1923 1924 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1925 @*/ 1926 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1927 { 1928 PetscErrorCode ierr; 1929 PetscSF multi = NULL; 1930 1931 PetscFunctionBegin; 1932 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1933 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1934 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1939 { 1940 PetscInt i, n, nleaves; 1941 const PetscInt *ilocal = NULL; 1942 PetscHSetI seen; 1943 PetscErrorCode ierr; 1944 1945 PetscFunctionBegin; 1946 if (PetscDefined(USE_DEBUG)) { 1947 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1948 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1949 for (i = 0; i < nleaves; i++) { 1950 const PetscInt leaf = ilocal ? ilocal[i] : i; 1951 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1952 } 1953 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1954 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1955 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1956 } 1957 PetscFunctionReturn(0); 1958 } 1959 1960 /*@ 1961 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1962 1963 Input Parameters: 1964 + sfA - The first PetscSF 1965 - sfB - The second PetscSF 1966 1967 Output Parameters: 1968 . sfBA - The composite SF 1969 1970 Level: developer 1971 1972 Notes: 1973 Currently, the two SFs must be defined on congruent communicators and they must be true star 1974 forests, i.e. the same leaf is not connected with different roots. 1975 1976 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1977 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1978 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1979 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1980 1981 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1982 @*/ 1983 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1984 { 1985 PetscErrorCode ierr; 1986 const PetscSFNode *remotePointsA,*remotePointsB; 1987 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1988 const PetscInt *localPointsA,*localPointsB; 1989 PetscInt *localPointsBA; 1990 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1991 PetscBool denseB; 1992 1993 PetscFunctionBegin; 1994 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1995 PetscSFCheckGraphSet(sfA,1); 1996 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1997 PetscSFCheckGraphSet(sfB,2); 1998 PetscCheckSameComm(sfA,1,sfB,2); 1999 PetscValidPointer(sfBA,3); 2000 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2001 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2002 2003 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2004 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2005 if (localPointsA) { 2006 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2007 for (i=0; i<numRootsB; i++) { 2008 reorderedRemotePointsA[i].rank = -1; 2009 reorderedRemotePointsA[i].index = -1; 2010 } 2011 for (i=0; i<numLeavesA; i++) { 2012 if (localPointsA[i] >= numRootsB) continue; 2013 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2014 } 2015 remotePointsA = reorderedRemotePointsA; 2016 } 2017 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2018 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2019 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 2020 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 2021 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2022 2023 denseB = (PetscBool)!localPointsB; 2024 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2025 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2026 else numLeavesBA++; 2027 } 2028 if (denseB) { 2029 localPointsBA = NULL; 2030 remotePointsBA = leafdataB; 2031 } else { 2032 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2033 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2034 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2035 const PetscInt l = localPointsB ? localPointsB[i] : i; 2036 2037 if (leafdataB[l-minleaf].rank == -1) continue; 2038 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2039 localPointsBA[numLeavesBA] = l; 2040 numLeavesBA++; 2041 } 2042 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2043 } 2044 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2045 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2046 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 /*@ 2051 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2052 2053 Input Parameters: 2054 + sfA - The first PetscSF 2055 - sfB - The second PetscSF 2056 2057 Output Parameters: 2058 . sfBA - The composite SF. 2059 2060 Level: developer 2061 2062 Notes: 2063 Currently, the two SFs must be defined on congruent communicators and they must be true star 2064 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2065 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2066 2067 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2068 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2069 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2070 a Reduce on sfB, on connected roots. 2071 2072 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2073 @*/ 2074 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2075 { 2076 PetscErrorCode ierr; 2077 const PetscSFNode *remotePointsA,*remotePointsB; 2078 PetscSFNode *remotePointsBA; 2079 const PetscInt *localPointsA,*localPointsB; 2080 PetscSFNode *reorderedRemotePointsA = NULL; 2081 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2082 MPI_Op op; 2083 #if defined(PETSC_USE_64BIT_INDICES) 2084 PetscBool iswin; 2085 #endif 2086 2087 PetscFunctionBegin; 2088 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2089 PetscSFCheckGraphSet(sfA,1); 2090 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2091 PetscSFCheckGraphSet(sfB,2); 2092 PetscCheckSameComm(sfA,1,sfB,2); 2093 PetscValidPointer(sfBA,3); 2094 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2095 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2096 2097 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2098 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2099 2100 /* TODO: Check roots of sfB have degree of 1 */ 2101 /* Once we implement it, we can replace the MPI_MAXLOC 2102 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 2103 We use MPI_MAXLOC only to have a deterministic output from this routine if 2104 the root condition is not meet. 2105 */ 2106 op = MPI_MAXLOC; 2107 #if defined(PETSC_USE_64BIT_INDICES) 2108 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2109 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2110 if (iswin) op = MPIU_REPLACE; 2111 #endif 2112 2113 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2114 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2115 for (i=0; i<maxleaf - minleaf + 1; i++) { 2116 reorderedRemotePointsA[i].rank = -1; 2117 reorderedRemotePointsA[i].index = -1; 2118 } 2119 if (localPointsA) { 2120 for (i=0; i<numLeavesA; i++) { 2121 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2122 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2123 } 2124 } else { 2125 for (i=0; i<numLeavesA; i++) { 2126 if (i > maxleaf || i < minleaf) continue; 2127 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2128 } 2129 } 2130 2131 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2132 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2133 for (i=0; i<numRootsB; i++) { 2134 remotePointsBA[i].rank = -1; 2135 remotePointsBA[i].index = -1; 2136 } 2137 2138 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2139 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2140 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2141 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2142 if (remotePointsBA[i].rank == -1) continue; 2143 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2144 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2145 localPointsBA[numLeavesBA] = i; 2146 numLeavesBA++; 2147 } 2148 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2149 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2150 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2151 PetscFunctionReturn(0); 2152 } 2153 2154 /* 2155 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2156 2157 Input Parameters: 2158 . sf - The global PetscSF 2159 2160 Output Parameters: 2161 . out - The local PetscSF 2162 */ 2163 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2164 { 2165 MPI_Comm comm; 2166 PetscMPIInt myrank; 2167 const PetscInt *ilocal; 2168 const PetscSFNode *iremote; 2169 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2170 PetscSFNode *liremote; 2171 PetscSF lsf; 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2176 if (sf->ops->CreateLocalSF) { 2177 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2178 } else { 2179 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2180 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2181 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2182 2183 /* Find out local edges and build a local SF */ 2184 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2185 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2186 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2187 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2188 2189 for (i=j=0; i<nleaves; i++) { 2190 if (iremote[i].rank == (PetscInt)myrank) { 2191 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2192 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2193 liremote[j].index = iremote[i].index; 2194 j++; 2195 } 2196 } 2197 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2198 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2199 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2200 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2201 *out = lsf; 2202 } 2203 PetscFunctionReturn(0); 2204 } 2205 2206 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2207 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2208 { 2209 PetscErrorCode ierr; 2210 PetscMemType rootmtype,leafmtype; 2211 2212 PetscFunctionBegin; 2213 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2214 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2215 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2216 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2217 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2218 if (sf->ops->BcastToZero) { 2219 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2220 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2221 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225