1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_HAVE_CUDA) 6 #include <cuda_runtime.h> 7 #endif 8 9 #if defined(PETSC_HAVE_HIP) 10 #include <hip/hip_runtime.h> 11 #endif 12 13 #if defined(PETSC_USE_DEBUG) 14 # define PetscSFCheckGraphSet(sf,arg) do { \ 15 if (PetscUnlikely(!(sf)->graphset)) \ 16 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 17 } while (0) 18 #else 19 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 20 #endif 21 22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 23 24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 25 { 26 PetscFunctionBegin; 27 PetscValidPointer(type,2); 28 *type = PETSC_MEMTYPE_HOST; 29 #if defined(PETSC_HAVE_CUDA) 30 if (PetscCUDAInitialized && data) { 31 cudaError_t cerr; 32 struct cudaPointerAttributes attr; 33 enum cudaMemoryType mtype; 34 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 35 cudaGetLastError(); /* Reset the last error */ 36 #if (CUDART_VERSION < 10000) 37 mtype = attr.memoryType; 38 #else 39 mtype = attr.type; 40 #endif 41 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 42 } 43 #endif 44 45 #if defined(PETSC_HAVE_HIP) 46 if (PetscHIPInitialized && data) { 47 hipError_t cerr; 48 struct hipPointerAttribute_t attr; 49 enum hipMemoryType mtype; 50 cerr = hipPointerGetAttributes(&attr,data); 51 hipGetLastError(); /* Reset the last error */ 52 mtype = attr.memoryType; 53 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 54 } 55 #endif 56 PetscFunctionReturn(0); 57 } 58 59 /*@ 60 PetscSFCreate - create a star forest communication context 61 62 Collective 63 64 Input Arguments: 65 . comm - communicator on which the star forest will operate 66 67 Output Arguments: 68 . sf - new star forest context 69 70 Options Database Keys: 71 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 72 . -sf_type window -Use MPI-3 one-sided window for communication 73 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 74 75 Level: intermediate 76 77 Notes: 78 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 79 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 80 SFs are optimized and they have better performance than general SFs. 81 82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 83 @*/ 84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 85 { 86 PetscErrorCode ierr; 87 PetscSF b; 88 89 PetscFunctionBegin; 90 PetscValidPointer(sf,2); 91 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 92 93 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 94 95 b->nroots = -1; 96 b->nleaves = -1; 97 b->minleaf = PETSC_MAX_INT; 98 b->maxleaf = PETSC_MIN_INT; 99 b->nranks = -1; 100 b->rankorder = PETSC_TRUE; 101 b->ingroup = MPI_GROUP_NULL; 102 b->outgroup = MPI_GROUP_NULL; 103 b->graphset = PETSC_FALSE; 104 #if defined(PETSC_HAVE_DEVICE) 105 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 106 b->use_stream_aware_mpi = PETSC_FALSE; 107 b->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 108 /* Set the default */ 109 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 110 b->backend = PETSCSF_BACKEND_KOKKOS; 111 #elif defined(PETSC_HAVE_CUDA) 112 b->backend = PETSCSF_BACKEND_CUDA; 113 #elif defined(PETSC_HAVE_HIP) 114 b->backend = PETSCSF_BACKEND_HIP; 115 #endif 116 #endif 117 *sf = b; 118 PetscFunctionReturn(0); 119 } 120 121 /*@ 122 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 123 124 Collective 125 126 Input Arguments: 127 . sf - star forest 128 129 Level: advanced 130 131 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 132 @*/ 133 PetscErrorCode PetscSFReset(PetscSF sf) 134 { 135 PetscErrorCode ierr; 136 137 PetscFunctionBegin; 138 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 139 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 140 sf->nroots = -1; 141 sf->nleaves = -1; 142 sf->minleaf = PETSC_MAX_INT; 143 sf->maxleaf = PETSC_MIN_INT; 144 sf->mine = NULL; 145 sf->remote = NULL; 146 sf->graphset = PETSC_FALSE; 147 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 148 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 149 sf->nranks = -1; 150 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 151 #if defined(PETSC_HAVE_DEVICE) 152 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 153 #endif 154 sf->degreeknown = PETSC_FALSE; 155 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 156 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 157 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 158 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 159 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 160 sf->setupcalled = PETSC_FALSE; 161 PetscFunctionReturn(0); 162 } 163 164 /*@C 165 PetscSFSetType - Set the PetscSF communication implementation 166 167 Collective on PetscSF 168 169 Input Parameters: 170 + sf - the PetscSF context 171 - type - a known method 172 173 Options Database Key: 174 . -sf_type <type> - Sets the method; use -help for a list 175 of available methods (for instance, window, basic, neighbor) 176 177 Notes: 178 See "include/petscsf.h" for available methods (for instance) 179 + PETSCSFWINDOW - MPI-2/3 one-sided 180 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 181 182 Level: intermediate 183 184 .seealso: PetscSFType, PetscSFCreate() 185 @*/ 186 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 187 { 188 PetscErrorCode ierr,(*r)(PetscSF); 189 PetscBool match; 190 191 PetscFunctionBegin; 192 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 193 PetscValidCharPointer(type,2); 194 195 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 196 if (match) PetscFunctionReturn(0); 197 198 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 199 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 200 /* Destroy the previous PetscSF implementation context */ 201 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 202 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 203 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 204 ierr = (*r)(sf);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 /*@C 209 PetscSFGetType - Get the PetscSF communication implementation 210 211 Not Collective 212 213 Input Parameter: 214 . sf - the PetscSF context 215 216 Output Parameter: 217 . type - the PetscSF type name 218 219 Level: intermediate 220 221 .seealso: PetscSFSetType(), PetscSFCreate() 222 @*/ 223 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 224 { 225 PetscFunctionBegin; 226 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 227 PetscValidPointer(type,2); 228 *type = ((PetscObject)sf)->type_name; 229 PetscFunctionReturn(0); 230 } 231 232 /*@ 233 PetscSFDestroy - destroy star forest 234 235 Collective 236 237 Input Arguments: 238 . sf - address of star forest 239 240 Level: intermediate 241 242 .seealso: PetscSFCreate(), PetscSFReset() 243 @*/ 244 PetscErrorCode PetscSFDestroy(PetscSF *sf) 245 { 246 PetscErrorCode ierr; 247 248 PetscFunctionBegin; 249 if (!*sf) PetscFunctionReturn(0); 250 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 251 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 252 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 253 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 254 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 255 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRQ(ierr);} 256 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 257 PetscFunctionReturn(0); 258 } 259 260 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 261 { 262 PetscInt i, nleaves; 263 PetscMPIInt size; 264 const PetscInt *ilocal; 265 const PetscSFNode *iremote; 266 PetscErrorCode ierr; 267 268 PetscFunctionBegin; 269 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 270 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 271 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 272 for (i = 0; i < nleaves; i++) { 273 const PetscInt rank = iremote[i].rank; 274 const PetscInt remote = iremote[i].index; 275 const PetscInt leaf = ilocal ? ilocal[i] : i; 276 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 277 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 278 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 279 } 280 PetscFunctionReturn(0); 281 } 282 283 284 285 286 /*@ 287 PetscSFSetUp - set up communication structures 288 289 Collective 290 291 Input Arguments: 292 . sf - star forest communication object 293 294 Level: beginner 295 296 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 297 @*/ 298 PetscErrorCode PetscSFSetUp(PetscSF sf) 299 { 300 PetscErrorCode ierr; 301 302 PetscFunctionBegin; 303 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 304 PetscSFCheckGraphSet(sf,1); 305 if (sf->setupcalled) PetscFunctionReturn(0); 306 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 307 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 308 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 309 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 310 #if defined(PETSC_HAVE_CUDA) 311 if (sf->backend == PETSCSF_BACKEND_CUDA) { 312 sf->ops->Malloc = PetscSFMalloc_Cuda; 313 sf->ops->Free = PetscSFFree_Cuda; 314 } 315 #endif 316 #if defined(PETSC_HAVE_HIP) 317 if (sf->backend == PETSCSF_BACKEND_HIP) { 318 sf->ops->Malloc = PetscSFMalloc_HIP; 319 sf->ops->Free = PetscSFFree_HIP; 320 } 321 #endif 322 323 # 324 #if defined(PETSC_HAVE_KOKKOS) 325 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 326 sf->ops->Malloc = PetscSFMalloc_Kokkos; 327 sf->ops->Free = PetscSFFree_Kokkos; 328 } 329 #endif 330 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 331 sf->setupcalled = PETSC_TRUE; 332 PetscFunctionReturn(0); 333 } 334 335 /*@ 336 PetscSFSetFromOptions - set PetscSF options using the options database 337 338 Logically Collective 339 340 Input Arguments: 341 . sf - star forest 342 343 Options Database Keys: 344 + -sf_type - implementation type, see PetscSFSetType() 345 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 346 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 347 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 348 If true, this option only works with -use_gpu_aware_mpi 1. 349 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 350 If true, this option only works with -use_gpu_aware_mpi 1. 351 352 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 353 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 354 the only available is kokkos. 355 356 Level: intermediate 357 @*/ 358 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 359 { 360 PetscSFType deft; 361 char type[256]; 362 PetscErrorCode ierr; 363 PetscBool flg; 364 365 PetscFunctionBegin; 366 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 367 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 368 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 369 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 370 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 371 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 372 #if defined(PETSC_HAVE_DEVICE) 373 { 374 char backendstr[32] = {0}; 375 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 376 /* Change the defaults set in PetscSFCreate() with command line options */ 377 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 378 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 379 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 380 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 381 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 382 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 383 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 384 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 385 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 386 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 387 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 388 #elif defined(PETSC_HAVE_KOKKOS) 389 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 390 #endif 391 } 392 #endif 393 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 394 ierr = PetscOptionsEnd();CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /*@ 399 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 400 401 Logically Collective 402 403 Input Arguments: 404 + sf - star forest 405 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 406 407 Level: advanced 408 409 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 410 @*/ 411 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 412 { 413 PetscFunctionBegin; 414 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 415 PetscValidLogicalCollectiveBool(sf,flg,2); 416 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 417 sf->rankorder = flg; 418 PetscFunctionReturn(0); 419 } 420 421 /*@ 422 PetscSFSetGraph - Set a parallel star forest 423 424 Collective 425 426 Input Arguments: 427 + sf - star forest 428 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 429 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 430 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 431 during setup in debug mode) 432 . localmode - copy mode for ilocal 433 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 434 during setup in debug mode) 435 - remotemode - copy mode for iremote 436 437 Level: intermediate 438 439 Notes: 440 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 441 442 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 443 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 444 needed 445 446 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 447 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 448 449 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 450 @*/ 451 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 452 { 453 PetscErrorCode ierr; 454 455 PetscFunctionBegin; 456 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 457 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 458 if (nleaves > 0) PetscValidPointer(iremote,6); 459 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 460 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 461 462 if (sf->nroots >= 0) { /* Reset only if graph already set */ 463 ierr = PetscSFReset(sf);CHKERRQ(ierr); 464 } 465 466 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 467 468 sf->nroots = nroots; 469 sf->nleaves = nleaves; 470 471 if (nleaves && ilocal) { 472 PetscInt i; 473 PetscInt minleaf = PETSC_MAX_INT; 474 PetscInt maxleaf = PETSC_MIN_INT; 475 int contiguous = 1; 476 for (i=0; i<nleaves; i++) { 477 minleaf = PetscMin(minleaf,ilocal[i]); 478 maxleaf = PetscMax(maxleaf,ilocal[i]); 479 contiguous &= (ilocal[i] == i); 480 } 481 sf->minleaf = minleaf; 482 sf->maxleaf = maxleaf; 483 if (contiguous) { 484 if (localmode == PETSC_OWN_POINTER) { 485 ierr = PetscFree(ilocal);CHKERRQ(ierr); 486 } 487 ilocal = NULL; 488 } 489 } else { 490 sf->minleaf = 0; 491 sf->maxleaf = nleaves - 1; 492 } 493 494 if (ilocal) { 495 switch (localmode) { 496 case PETSC_COPY_VALUES: 497 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 498 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 499 sf->mine = sf->mine_alloc; 500 break; 501 case PETSC_OWN_POINTER: 502 sf->mine_alloc = (PetscInt*)ilocal; 503 sf->mine = sf->mine_alloc; 504 break; 505 case PETSC_USE_POINTER: 506 sf->mine_alloc = NULL; 507 sf->mine = (PetscInt*)ilocal; 508 break; 509 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 510 } 511 } 512 513 switch (remotemode) { 514 case PETSC_COPY_VALUES: 515 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 516 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 517 sf->remote = sf->remote_alloc; 518 break; 519 case PETSC_OWN_POINTER: 520 sf->remote_alloc = (PetscSFNode*)iremote; 521 sf->remote = sf->remote_alloc; 522 break; 523 case PETSC_USE_POINTER: 524 sf->remote_alloc = NULL; 525 sf->remote = (PetscSFNode*)iremote; 526 break; 527 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 528 } 529 530 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 531 sf->graphset = PETSC_TRUE; 532 PetscFunctionReturn(0); 533 } 534 535 /*@ 536 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 537 538 Collective 539 540 Input Parameters: 541 + sf - The PetscSF 542 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 543 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 544 545 Notes: 546 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 547 n and N are local and global sizes of x respectively. 548 549 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 550 sequential vectors y on all ranks. 551 552 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 553 sequential vector y on rank 0. 554 555 In above cases, entries of x are roots and entries of y are leaves. 556 557 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 558 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 559 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 560 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 561 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 562 563 In this case, roots and leaves are symmetric. 564 565 Level: intermediate 566 @*/ 567 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 568 { 569 MPI_Comm comm; 570 PetscInt n,N,res[2]; 571 PetscMPIInt rank,size; 572 PetscSFType type; 573 PetscErrorCode ierr; 574 575 PetscFunctionBegin; 576 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 577 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 578 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 579 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 580 581 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 582 type = PETSCSFALLTOALL; 583 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 584 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 585 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 586 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 587 } else { 588 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 589 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 590 res[0] = n; 591 res[1] = -n; 592 /* Check if n are same over all ranks so that we can optimize it */ 593 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 594 if (res[0] == -res[1]) { /* same n */ 595 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 596 } else { 597 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 598 } 599 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 600 } 601 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 602 603 sf->pattern = pattern; 604 sf->mine = NULL; /* Contiguous */ 605 606 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 607 Also set other easy stuff. 608 */ 609 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 610 sf->nleaves = N; 611 sf->nroots = n; 612 sf->nranks = size; 613 sf->minleaf = 0; 614 sf->maxleaf = N - 1; 615 } else if (pattern == PETSCSF_PATTERN_GATHER) { 616 sf->nleaves = rank ? 0 : N; 617 sf->nroots = n; 618 sf->nranks = rank ? 0 : size; 619 sf->minleaf = 0; 620 sf->maxleaf = rank ? -1 : N - 1; 621 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 622 sf->nleaves = size; 623 sf->nroots = size; 624 sf->nranks = size; 625 sf->minleaf = 0; 626 sf->maxleaf = size - 1; 627 } 628 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 629 sf->graphset = PETSC_TRUE; 630 PetscFunctionReturn(0); 631 } 632 633 /*@ 634 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 635 636 Collective 637 638 Input Arguments: 639 640 . sf - star forest to invert 641 642 Output Arguments: 643 . isf - inverse of sf 644 Level: advanced 645 646 Notes: 647 All roots must have degree 1. 648 649 The local space may be a permutation, but cannot be sparse. 650 651 .seealso: PetscSFSetGraph() 652 @*/ 653 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 654 { 655 PetscErrorCode ierr; 656 PetscMPIInt rank; 657 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 658 const PetscInt *ilocal; 659 PetscSFNode *roots,*leaves; 660 661 PetscFunctionBegin; 662 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 663 PetscSFCheckGraphSet(sf,1); 664 PetscValidPointer(isf,2); 665 666 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 667 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 668 669 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 670 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 671 for (i=0; i<maxlocal; i++) { 672 leaves[i].rank = rank; 673 leaves[i].index = i; 674 } 675 for (i=0; i <nroots; i++) { 676 roots[i].rank = -1; 677 roots[i].index = -1; 678 } 679 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 680 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 681 682 /* Check whether our leaves are sparse */ 683 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 684 if (count == nroots) newilocal = NULL; 685 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 686 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 687 for (i=0,count=0; i<nroots; i++) { 688 if (roots[i].rank >= 0) { 689 newilocal[count] = i; 690 roots[count].rank = roots[i].rank; 691 roots[count].index = roots[i].index; 692 count++; 693 } 694 } 695 } 696 697 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 698 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 699 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 /*@ 704 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 705 706 Collective 707 708 Input Arguments: 709 + sf - communication object to duplicate 710 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 711 712 Output Arguments: 713 . newsf - new communication object 714 715 Level: beginner 716 717 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 718 @*/ 719 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 720 { 721 PetscSFType type; 722 PetscErrorCode ierr; 723 MPI_Datatype dtype=MPIU_SCALAR; 724 725 PetscFunctionBegin; 726 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 727 PetscValidLogicalCollectiveEnum(sf,opt,2); 728 PetscValidPointer(newsf,3); 729 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 730 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 731 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 732 if (opt == PETSCSF_DUPLICATE_GRAPH) { 733 PetscSFCheckGraphSet(sf,1); 734 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 735 PetscInt nroots,nleaves; 736 const PetscInt *ilocal; 737 const PetscSFNode *iremote; 738 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 739 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 740 } else { 741 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 742 } 743 } 744 /* Since oldtype is committed, so is newtype, according to MPI */ 745 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRQ(ierr);} 746 (*newsf)->vscat.bs = sf->vscat.bs; 747 (*newsf)->vscat.unit = dtype; 748 (*newsf)->vscat.to_n = sf->vscat.to_n; 749 (*newsf)->vscat.from_n = sf->vscat.from_n; 750 /* Do not copy lsf. Build it on demand since it is rarely used */ 751 752 #if defined(PETSC_HAVE_DEVICE) 753 (*newsf)->backend = sf->backend; 754 (*newsf)->use_default_stream = sf->use_default_stream; 755 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 756 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 757 #endif 758 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 759 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 760 PetscFunctionReturn(0); 761 } 762 763 /*@C 764 PetscSFGetGraph - Get the graph specifying a parallel star forest 765 766 Not Collective 767 768 Input Arguments: 769 . sf - star forest 770 771 Output Arguments: 772 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 773 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 774 . ilocal - locations of leaves in leafdata buffers 775 - iremote - remote locations of root vertices for each leaf on the current process 776 777 Notes: 778 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 779 780 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 781 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 782 783 Level: intermediate 784 785 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 786 @*/ 787 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 788 { 789 PetscErrorCode ierr; 790 791 PetscFunctionBegin; 792 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 793 if (sf->ops->GetGraph) { 794 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 795 } else { 796 if (nroots) *nroots = sf->nroots; 797 if (nleaves) *nleaves = sf->nleaves; 798 if (ilocal) *ilocal = sf->mine; 799 if (iremote) *iremote = sf->remote; 800 } 801 PetscFunctionReturn(0); 802 } 803 804 /*@ 805 PetscSFGetLeafRange - Get the active leaf ranges 806 807 Not Collective 808 809 Input Arguments: 810 . sf - star forest 811 812 Output Arguments: 813 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 814 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 815 816 Level: developer 817 818 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 819 @*/ 820 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 821 { 822 PetscFunctionBegin; 823 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 824 PetscSFCheckGraphSet(sf,1); 825 if (minleaf) *minleaf = sf->minleaf; 826 if (maxleaf) *maxleaf = sf->maxleaf; 827 PetscFunctionReturn(0); 828 } 829 830 /*@C 831 PetscSFViewFromOptions - View from Options 832 833 Collective on PetscSF 834 835 Input Parameters: 836 + A - the star forest 837 . obj - Optional object 838 - name - command line option 839 840 Level: intermediate 841 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 842 @*/ 843 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 844 { 845 PetscErrorCode ierr; 846 847 PetscFunctionBegin; 848 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 849 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /*@C 854 PetscSFView - view a star forest 855 856 Collective 857 858 Input Arguments: 859 + sf - star forest 860 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 861 862 Level: beginner 863 864 .seealso: PetscSFCreate(), PetscSFSetGraph() 865 @*/ 866 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 867 { 868 PetscErrorCode ierr; 869 PetscBool iascii; 870 PetscViewerFormat format; 871 872 PetscFunctionBegin; 873 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 874 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 875 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 876 PetscCheckSameComm(sf,1,viewer,2); 877 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 878 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 879 if (iascii) { 880 PetscMPIInt rank; 881 PetscInt ii,i,j; 882 883 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 884 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 885 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 886 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 887 if (!sf->graphset) { 888 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 889 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 893 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 895 for (i=0; i<sf->nleaves; i++) { 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 897 } 898 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 899 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 900 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 901 PetscMPIInt *tmpranks,*perm; 902 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 903 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 904 for (i=0; i<sf->nranks; i++) perm[i] = i; 905 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 906 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 907 for (ii=0; ii<sf->nranks; ii++) { 908 i = perm[ii]; 909 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 910 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 911 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 912 } 913 } 914 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 915 } 916 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 917 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 918 } 919 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 920 } 921 PetscFunctionReturn(0); 922 } 923 924 /*@C 925 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 926 927 Not Collective 928 929 Input Arguments: 930 . sf - star forest 931 932 Output Arguments: 933 + nranks - number of ranks referenced by local part 934 . ranks - array of ranks 935 . roffset - offset in rmine/rremote for each rank (length nranks+1) 936 . rmine - concatenated array holding local indices referencing each remote rank 937 - rremote - concatenated array holding remote indices referenced for each remote rank 938 939 Level: developer 940 941 .seealso: PetscSFGetLeafRanks() 942 @*/ 943 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 944 { 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 949 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 950 if (sf->ops->GetRootRanks) { 951 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 952 } else { 953 /* The generic implementation */ 954 if (nranks) *nranks = sf->nranks; 955 if (ranks) *ranks = sf->ranks; 956 if (roffset) *roffset = sf->roffset; 957 if (rmine) *rmine = sf->rmine; 958 if (rremote) *rremote = sf->rremote; 959 } 960 PetscFunctionReturn(0); 961 } 962 963 /*@C 964 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 965 966 Not Collective 967 968 Input Arguments: 969 . sf - star forest 970 971 Output Arguments: 972 + niranks - number of leaf ranks referencing roots on this process 973 . iranks - array of ranks 974 . ioffset - offset in irootloc for each rank (length niranks+1) 975 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 976 977 Level: developer 978 979 .seealso: PetscSFGetRootRanks() 980 @*/ 981 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 982 { 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 987 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 988 if (sf->ops->GetLeafRanks) { 989 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 990 } else { 991 PetscSFType type; 992 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 993 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 994 } 995 PetscFunctionReturn(0); 996 } 997 998 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 999 PetscInt i; 1000 for (i=0; i<n; i++) { 1001 if (needle == list[i]) return PETSC_TRUE; 1002 } 1003 return PETSC_FALSE; 1004 } 1005 1006 /*@C 1007 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 1008 1009 Collective 1010 1011 Input Arguments: 1012 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1013 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1014 1015 Level: developer 1016 1017 .seealso: PetscSFGetRootRanks() 1018 @*/ 1019 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1020 { 1021 PetscErrorCode ierr; 1022 PetscTable table; 1023 PetscTablePosition pos; 1024 PetscMPIInt size,groupsize,*groupranks; 1025 PetscInt *rcount,*ranks; 1026 PetscInt i, irank = -1,orank = -1; 1027 1028 PetscFunctionBegin; 1029 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1030 PetscSFCheckGraphSet(sf,1); 1031 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 1032 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1033 for (i=0; i<sf->nleaves; i++) { 1034 /* Log 1-based rank */ 1035 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1036 } 1037 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1038 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1039 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1040 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1041 for (i=0; i<sf->nranks; i++) { 1042 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1043 ranks[i]--; /* Convert back to 0-based */ 1044 } 1045 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1046 1047 /* We expect that dgroup is reliably "small" while nranks could be large */ 1048 { 1049 MPI_Group group = MPI_GROUP_NULL; 1050 PetscMPIInt *dgroupranks; 1051 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1052 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 1053 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1054 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1055 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1056 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 1057 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1058 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1059 } 1060 1061 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1062 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1063 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1064 if (InList(ranks[i],groupsize,groupranks)) break; 1065 } 1066 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1067 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1068 } 1069 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1070 PetscInt tmprank,tmpcount; 1071 1072 tmprank = ranks[i]; 1073 tmpcount = rcount[i]; 1074 ranks[i] = ranks[sf->ndranks]; 1075 rcount[i] = rcount[sf->ndranks]; 1076 ranks[sf->ndranks] = tmprank; 1077 rcount[sf->ndranks] = tmpcount; 1078 sf->ndranks++; 1079 } 1080 } 1081 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1082 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1083 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1084 sf->roffset[0] = 0; 1085 for (i=0; i<sf->nranks; i++) { 1086 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1087 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1088 rcount[i] = 0; 1089 } 1090 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1091 /* short circuit */ 1092 if (orank != sf->remote[i].rank) { 1093 /* Search for index of iremote[i].rank in sf->ranks */ 1094 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1095 if (irank < 0) { 1096 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1097 if (irank >= 0) irank += sf->ndranks; 1098 } 1099 orank = sf->remote[i].rank; 1100 } 1101 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1102 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1103 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1104 rcount[irank]++; 1105 } 1106 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 /*@C 1111 PetscSFGetGroups - gets incoming and outgoing process groups 1112 1113 Collective 1114 1115 Input Argument: 1116 . sf - star forest 1117 1118 Output Arguments: 1119 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1120 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1121 1122 Level: developer 1123 1124 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1125 @*/ 1126 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1127 { 1128 PetscErrorCode ierr; 1129 MPI_Group group = MPI_GROUP_NULL; 1130 1131 PetscFunctionBegin; 1132 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1133 if (sf->ingroup == MPI_GROUP_NULL) { 1134 PetscInt i; 1135 const PetscInt *indegree; 1136 PetscMPIInt rank,*outranks,*inranks; 1137 PetscSFNode *remote; 1138 PetscSF bgcount; 1139 1140 /* Compute the number of incoming ranks */ 1141 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1142 for (i=0; i<sf->nranks; i++) { 1143 remote[i].rank = sf->ranks[i]; 1144 remote[i].index = 0; 1145 } 1146 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1147 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1148 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1149 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1150 /* Enumerate the incoming ranks */ 1151 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1152 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1153 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1154 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1155 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1156 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1157 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1158 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1159 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1160 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1161 } 1162 *incoming = sf->ingroup; 1163 1164 if (sf->outgroup == MPI_GROUP_NULL) { 1165 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1166 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1167 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1168 } 1169 *outgoing = sf->outgroup; 1170 PetscFunctionReturn(0); 1171 } 1172 1173 /*@ 1174 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1175 1176 Collective 1177 1178 Input Argument: 1179 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1180 1181 Output Arguments: 1182 . multi - star forest with split roots, such that each root has degree exactly 1 1183 1184 Level: developer 1185 1186 Notes: 1187 1188 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1189 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1190 edge, it is a candidate for future optimization that might involve its removal. 1191 1192 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1193 @*/ 1194 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1195 { 1196 PetscErrorCode ierr; 1197 1198 PetscFunctionBegin; 1199 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1200 PetscValidPointer(multi,2); 1201 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1202 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1203 *multi = sf->multi; 1204 PetscFunctionReturn(0); 1205 } 1206 if (!sf->multi) { 1207 const PetscInt *indegree; 1208 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1209 PetscSFNode *remote; 1210 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1211 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1212 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1213 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1214 inoffset[0] = 0; 1215 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1216 for (i=0; i<maxlocal; i++) outones[i] = 1; 1217 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1218 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1219 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1220 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1221 for (i=0; i<sf->nroots; i++) { 1222 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1223 } 1224 } 1225 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1226 for (i=0; i<sf->nleaves; i++) { 1227 remote[i].rank = sf->remote[i].rank; 1228 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1229 } 1230 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1231 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1232 if (sf->rankorder) { /* Sort the ranks */ 1233 PetscMPIInt rank; 1234 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1235 PetscSFNode *newremote; 1236 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1237 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1238 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1239 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1240 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1241 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1242 /* Sort the incoming ranks at each vertex, build the inverse map */ 1243 for (i=0; i<sf->nroots; i++) { 1244 PetscInt j; 1245 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1246 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1247 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1248 } 1249 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1250 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1251 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1252 for (i=0; i<sf->nleaves; i++) { 1253 newremote[i].rank = sf->remote[i].rank; 1254 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1255 } 1256 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1257 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1258 } 1259 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1260 } 1261 *multi = sf->multi; 1262 PetscFunctionReturn(0); 1263 } 1264 1265 /*@C 1266 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1267 1268 Collective 1269 1270 Input Arguments: 1271 + sf - original star forest 1272 . nselected - number of selected roots on this process 1273 - selected - indices of the selected roots on this process 1274 1275 Output Arguments: 1276 . esf - new star forest 1277 1278 Level: advanced 1279 1280 Note: 1281 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1282 be done by calling PetscSFGetGraph(). 1283 1284 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1285 @*/ 1286 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1287 { 1288 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1289 const PetscInt *ilocal; 1290 signed char *rootdata,*leafdata,*leafmem; 1291 const PetscSFNode *iremote; 1292 PetscSFNode *new_iremote; 1293 MPI_Comm comm; 1294 PetscErrorCode ierr; 1295 1296 PetscFunctionBegin; 1297 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1298 PetscSFCheckGraphSet(sf,1); 1299 if (nselected) PetscValidPointer(selected,3); 1300 PetscValidPointer(esf,4); 1301 1302 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1303 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1304 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1305 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1306 1307 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1308 PetscBool dups; 1309 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1310 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1311 for (i=0; i<nselected; i++) 1312 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1313 } 1314 1315 if (sf->ops->CreateEmbeddedSF) { 1316 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1317 } else { 1318 /* A generic version of creating embedded sf */ 1319 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1320 maxlocal = maxleaf - minleaf + 1; 1321 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1322 leafdata = leafmem - minleaf; 1323 /* Tag selected roots and bcast to leaves */ 1324 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1325 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1326 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1327 1328 /* Build esf with leaves that are still connected */ 1329 esf_nleaves = 0; 1330 for (i=0; i<nleaves; i++) { 1331 j = ilocal ? ilocal[i] : i; 1332 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1333 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1334 */ 1335 esf_nleaves += (leafdata[j] ? 1 : 0); 1336 } 1337 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1338 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1339 for (i=n=0; i<nleaves; i++) { 1340 j = ilocal ? ilocal[i] : i; 1341 if (leafdata[j]) { 1342 new_ilocal[n] = j; 1343 new_iremote[n].rank = iremote[i].rank; 1344 new_iremote[n].index = iremote[i].index; 1345 ++n; 1346 } 1347 } 1348 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1349 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1350 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1351 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1352 } 1353 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 /*@C 1358 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1359 1360 Collective 1361 1362 Input Arguments: 1363 + sf - original star forest 1364 . nselected - number of selected leaves on this process 1365 - selected - indices of the selected leaves on this process 1366 1367 Output Arguments: 1368 . newsf - new star forest 1369 1370 Level: advanced 1371 1372 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1373 @*/ 1374 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1375 { 1376 const PetscSFNode *iremote; 1377 PetscSFNode *new_iremote; 1378 const PetscInt *ilocal; 1379 PetscInt i,nroots,*leaves,*new_ilocal; 1380 MPI_Comm comm; 1381 PetscErrorCode ierr; 1382 1383 PetscFunctionBegin; 1384 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1385 PetscSFCheckGraphSet(sf,1); 1386 if (nselected) PetscValidPointer(selected,3); 1387 PetscValidPointer(newsf,4); 1388 1389 /* Uniq selected[] and put results in leaves[] */ 1390 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1391 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1392 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1393 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1394 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1395 1396 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1397 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1398 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1399 } else { 1400 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1401 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1402 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1403 for (i=0; i<nselected; ++i) { 1404 const PetscInt l = leaves[i]; 1405 new_ilocal[i] = ilocal ? ilocal[l] : l; 1406 new_iremote[i].rank = iremote[l].rank; 1407 new_iremote[i].index = iremote[l].index; 1408 } 1409 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1410 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1411 } 1412 ierr = PetscFree(leaves);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /*@C 1417 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1418 1419 Collective on PetscSF 1420 1421 Input Arguments: 1422 + sf - star forest on which to communicate 1423 . unit - data type associated with each node 1424 . rootdata - buffer to broadcast 1425 - op - operation to use for reduction 1426 1427 Output Arguments: 1428 . leafdata - buffer to be reduced with values from each leaf's respective root 1429 1430 Level: intermediate 1431 1432 Notes: 1433 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1434 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1435 use PetscSFBcastAndOpWithMemTypeBegin() instead. 1436 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin() 1437 @*/ 1438 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1439 { 1440 PetscErrorCode ierr; 1441 PetscMemType rootmtype,leafmtype; 1442 1443 PetscFunctionBegin; 1444 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1445 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1446 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1447 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1448 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1449 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1450 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1451 PetscFunctionReturn(0); 1452 } 1453 1454 /*@C 1455 PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd() 1456 1457 Collective on PetscSF 1458 1459 Input Arguments: 1460 + sf - star forest on which to communicate 1461 . unit - data type associated with each node 1462 . rootmtype - memory type of rootdata 1463 . rootdata - buffer to broadcast 1464 . leafmtype - memory type of leafdata 1465 - op - operation to use for reduction 1466 1467 Output Arguments: 1468 . leafdata - buffer to be reduced with values from each leaf's respective root 1469 1470 Level: intermediate 1471 1472 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin() 1473 @*/ 1474 PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1475 { 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1480 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1481 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1482 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1483 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);} 1484 PetscFunctionReturn(0); 1485 } 1486 1487 /*@C 1488 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1489 1490 Collective 1491 1492 Input Arguments: 1493 + sf - star forest 1494 . unit - data type 1495 . rootdata - buffer to broadcast 1496 - op - operation to use for reduction 1497 1498 Output Arguments: 1499 . leafdata - buffer to be reduced with values from each leaf's respective root 1500 1501 Level: intermediate 1502 1503 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1504 @*/ 1505 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1506 { 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1511 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);} 1512 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1513 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);} 1514 PetscFunctionReturn(0); 1515 } 1516 1517 /*@C 1518 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1519 1520 Collective 1521 1522 Input Arguments: 1523 + sf - star forest 1524 . unit - data type 1525 . leafdata - values to reduce 1526 - op - reduction operation 1527 1528 Output Arguments: 1529 . rootdata - result of reduction of values from all leaves of each root 1530 1531 Level: intermediate 1532 1533 Notes: 1534 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1535 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1536 use PetscSFReduceWithMemTypeBegin() instead. 1537 1538 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1539 @*/ 1540 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1541 { 1542 PetscErrorCode ierr; 1543 PetscMemType rootmtype,leafmtype; 1544 1545 PetscFunctionBegin; 1546 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1547 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1548 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1549 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1550 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1551 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1552 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1553 PetscFunctionReturn(0); 1554 } 1555 1556 /*@C 1557 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1558 1559 Collective 1560 1561 Input Arguments: 1562 + sf - star forest 1563 . unit - data type 1564 . leafmtype - memory type of leafdata 1565 . leafdata - values to reduce 1566 . rootmtype - memory type of rootdata 1567 - op - reduction operation 1568 1569 Output Arguments: 1570 . rootdata - result of reduction of values from all leaves of each root 1571 1572 Level: intermediate 1573 1574 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1575 @*/ 1576 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1577 { 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1582 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1583 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1584 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1585 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1586 PetscFunctionReturn(0); 1587 } 1588 1589 /*@C 1590 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1591 1592 Collective 1593 1594 Input Arguments: 1595 + sf - star forest 1596 . unit - data type 1597 . leafdata - values to reduce 1598 - op - reduction operation 1599 1600 Output Arguments: 1601 . rootdata - result of reduction of values from all leaves of each root 1602 1603 Level: intermediate 1604 1605 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1606 @*/ 1607 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1608 { 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1613 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1614 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1615 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1616 PetscFunctionReturn(0); 1617 } 1618 1619 /*@C 1620 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1621 1622 Collective 1623 1624 Input Arguments: 1625 + sf - star forest 1626 . unit - data type 1627 . leafdata - leaf values to use in reduction 1628 - op - operation to use for reduction 1629 1630 Output Arguments: 1631 + rootdata - root values to be updated, input state is seen by first process to perform an update 1632 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1633 1634 Level: advanced 1635 1636 Note: 1637 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1638 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1639 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1640 integers. 1641 1642 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1643 @*/ 1644 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1645 { 1646 PetscErrorCode ierr; 1647 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1648 1649 PetscFunctionBegin; 1650 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1651 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1652 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1653 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1654 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1655 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1656 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1657 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1658 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1659 PetscFunctionReturn(0); 1660 } 1661 1662 /*@C 1663 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1664 1665 Collective 1666 1667 Input Arguments: 1668 + sf - star forest 1669 . unit - data type 1670 . leafdata - leaf values to use in reduction 1671 - op - operation to use for reduction 1672 1673 Output Arguments: 1674 + rootdata - root values to be updated, input state is seen by first process to perform an update 1675 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1676 1677 Level: advanced 1678 1679 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1680 @*/ 1681 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1682 { 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1687 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1688 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1689 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1690 PetscFunctionReturn(0); 1691 } 1692 1693 /*@C 1694 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1695 1696 Collective 1697 1698 Input Arguments: 1699 . sf - star forest 1700 1701 Output Arguments: 1702 . degree - degree of each root vertex 1703 1704 Level: advanced 1705 1706 Notes: 1707 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1708 1709 .seealso: PetscSFGatherBegin() 1710 @*/ 1711 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1712 { 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1717 PetscSFCheckGraphSet(sf,1); 1718 PetscValidPointer(degree,2); 1719 if (!sf->degreeknown) { 1720 PetscInt i, nroots = sf->nroots, maxlocal; 1721 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1722 maxlocal = sf->maxleaf-sf->minleaf+1; 1723 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1724 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1725 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1726 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1727 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1728 } 1729 *degree = NULL; 1730 PetscFunctionReturn(0); 1731 } 1732 1733 /*@C 1734 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1735 1736 Collective 1737 1738 Input Arguments: 1739 . sf - star forest 1740 1741 Output Arguments: 1742 . degree - degree of each root vertex 1743 1744 Level: developer 1745 1746 Notes: 1747 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1748 1749 .seealso: 1750 @*/ 1751 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1752 { 1753 PetscErrorCode ierr; 1754 1755 PetscFunctionBegin; 1756 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1757 PetscSFCheckGraphSet(sf,1); 1758 PetscValidPointer(degree,2); 1759 if (!sf->degreeknown) { 1760 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1761 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1762 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1763 sf->degreeknown = PETSC_TRUE; 1764 } 1765 *degree = sf->degree; 1766 PetscFunctionReturn(0); 1767 } 1768 1769 1770 /*@C 1771 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1772 Each multi-root is assigned index of the corresponding original root. 1773 1774 Collective 1775 1776 Input Arguments: 1777 + sf - star forest 1778 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1779 1780 Output Arguments: 1781 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1782 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1783 1784 Level: developer 1785 1786 Notes: 1787 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1788 1789 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1790 @*/ 1791 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1792 { 1793 PetscSF msf; 1794 PetscInt i, j, k, nroots, nmroots; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1799 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1800 if (nroots) PetscValidIntPointer(degree,2); 1801 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1802 PetscValidPointer(multiRootsOrigNumbering,4); 1803 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1804 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1805 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1806 for (i=0,j=0,k=0; i<nroots; i++) { 1807 if (!degree[i]) continue; 1808 for (j=0; j<degree[i]; j++,k++) { 1809 (*multiRootsOrigNumbering)[k] = i; 1810 } 1811 } 1812 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1813 if (nMultiRoots) *nMultiRoots = nmroots; 1814 PetscFunctionReturn(0); 1815 } 1816 1817 /*@C 1818 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1819 1820 Collective 1821 1822 Input Arguments: 1823 + sf - star forest 1824 . unit - data type 1825 - leafdata - leaf data to gather to roots 1826 1827 Output Argument: 1828 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1829 1830 Level: intermediate 1831 1832 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1833 @*/ 1834 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1835 { 1836 PetscErrorCode ierr; 1837 PetscSF multi = NULL; 1838 1839 PetscFunctionBegin; 1840 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1841 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1842 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1843 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 1847 /*@C 1848 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1849 1850 Collective 1851 1852 Input Arguments: 1853 + sf - star forest 1854 . unit - data type 1855 - leafdata - leaf data to gather to roots 1856 1857 Output Argument: 1858 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1859 1860 Level: intermediate 1861 1862 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1863 @*/ 1864 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1865 { 1866 PetscErrorCode ierr; 1867 PetscSF multi = NULL; 1868 1869 PetscFunctionBegin; 1870 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1871 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1872 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1873 PetscFunctionReturn(0); 1874 } 1875 1876 /*@C 1877 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1878 1879 Collective 1880 1881 Input Arguments: 1882 + sf - star forest 1883 . unit - data type 1884 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1885 1886 Output Argument: 1887 . leafdata - leaf data to be update with personal data from each respective root 1888 1889 Level: intermediate 1890 1891 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1892 @*/ 1893 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1894 { 1895 PetscErrorCode ierr; 1896 PetscSF multi = NULL; 1897 1898 PetscFunctionBegin; 1899 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1900 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1901 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1902 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 /*@C 1907 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1908 1909 Collective 1910 1911 Input Arguments: 1912 + sf - star forest 1913 . unit - data type 1914 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1915 1916 Output Argument: 1917 . leafdata - leaf data to be update with personal data from each respective root 1918 1919 Level: intermediate 1920 1921 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1922 @*/ 1923 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1924 { 1925 PetscErrorCode ierr; 1926 PetscSF multi = NULL; 1927 1928 PetscFunctionBegin; 1929 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1930 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1931 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1936 { 1937 PetscInt i, n, nleaves; 1938 const PetscInt *ilocal = NULL; 1939 PetscHSetI seen; 1940 PetscErrorCode ierr; 1941 1942 PetscFunctionBegin; 1943 if (PetscDefined(USE_DEBUG)) { 1944 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1945 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1946 for (i = 0; i < nleaves; i++) { 1947 const PetscInt leaf = ilocal ? ilocal[i] : i; 1948 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1949 } 1950 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1951 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1952 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1953 } 1954 PetscFunctionReturn(0); 1955 } 1956 1957 /*@ 1958 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1959 1960 Input Parameters: 1961 + sfA - The first PetscSF 1962 - sfB - The second PetscSF 1963 1964 Output Parameters: 1965 . sfBA - The composite SF 1966 1967 Level: developer 1968 1969 Notes: 1970 Currently, the two SFs must be defined on congruent communicators and they must be true star 1971 forests, i.e. the same leaf is not connected with different roots. 1972 1973 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1974 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1975 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1976 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1977 1978 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1979 @*/ 1980 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1981 { 1982 PetscErrorCode ierr; 1983 const PetscSFNode *remotePointsA,*remotePointsB; 1984 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1985 const PetscInt *localPointsA,*localPointsB; 1986 PetscInt *localPointsBA; 1987 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1988 PetscBool denseB; 1989 1990 PetscFunctionBegin; 1991 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1992 PetscSFCheckGraphSet(sfA,1); 1993 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1994 PetscSFCheckGraphSet(sfB,2); 1995 PetscCheckSameComm(sfA,1,sfB,2); 1996 PetscValidPointer(sfBA,3); 1997 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1998 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1999 2000 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2001 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2002 if (localPointsA) { 2003 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2004 for (i=0; i<numRootsB; i++) { 2005 reorderedRemotePointsA[i].rank = -1; 2006 reorderedRemotePointsA[i].index = -1; 2007 } 2008 for (i=0; i<numLeavesA; i++) { 2009 if (localPointsA[i] >= numRootsB) continue; 2010 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2011 } 2012 remotePointsA = reorderedRemotePointsA; 2013 } 2014 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2015 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2016 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 2017 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 2018 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2019 2020 denseB = (PetscBool)!localPointsB; 2021 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2022 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2023 else numLeavesBA++; 2024 } 2025 if (denseB) { 2026 localPointsBA = NULL; 2027 remotePointsBA = leafdataB; 2028 } else { 2029 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2030 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2031 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2032 const PetscInt l = localPointsB ? localPointsB[i] : i; 2033 2034 if (leafdataB[l-minleaf].rank == -1) continue; 2035 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2036 localPointsBA[numLeavesBA] = l; 2037 numLeavesBA++; 2038 } 2039 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2040 } 2041 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2042 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2043 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 /*@ 2048 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2049 2050 Input Parameters: 2051 + sfA - The first PetscSF 2052 - sfB - The second PetscSF 2053 2054 Output Parameters: 2055 . sfBA - The composite SF. 2056 2057 Level: developer 2058 2059 Notes: 2060 Currently, the two SFs must be defined on congruent communicators and they must be true star 2061 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2062 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2063 2064 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2065 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2066 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2067 a Reduce on sfB, on connected roots. 2068 2069 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2070 @*/ 2071 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2072 { 2073 PetscErrorCode ierr; 2074 const PetscSFNode *remotePointsA,*remotePointsB; 2075 PetscSFNode *remotePointsBA; 2076 const PetscInt *localPointsA,*localPointsB; 2077 PetscSFNode *reorderedRemotePointsA = NULL; 2078 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2079 MPI_Op op; 2080 #if defined(PETSC_USE_64BIT_INDICES) 2081 PetscBool iswin; 2082 #endif 2083 2084 PetscFunctionBegin; 2085 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2086 PetscSFCheckGraphSet(sfA,1); 2087 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2088 PetscSFCheckGraphSet(sfB,2); 2089 PetscCheckSameComm(sfA,1,sfB,2); 2090 PetscValidPointer(sfBA,3); 2091 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2092 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2093 2094 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2095 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2096 2097 /* TODO: Check roots of sfB have degree of 1 */ 2098 /* Once we implement it, we can replace the MPI_MAXLOC 2099 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 2100 We use MPI_MAXLOC only to have a deterministic output from this routine if 2101 the root condition is not meet. 2102 */ 2103 op = MPI_MAXLOC; 2104 #if defined(PETSC_USE_64BIT_INDICES) 2105 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2106 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2107 if (iswin) op = MPIU_REPLACE; 2108 #endif 2109 2110 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2111 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2112 for (i=0; i<maxleaf - minleaf + 1; i++) { 2113 reorderedRemotePointsA[i].rank = -1; 2114 reorderedRemotePointsA[i].index = -1; 2115 } 2116 if (localPointsA) { 2117 for (i=0; i<numLeavesA; i++) { 2118 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2119 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2120 } 2121 } else { 2122 for (i=0; i<numLeavesA; i++) { 2123 if (i > maxleaf || i < minleaf) continue; 2124 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2125 } 2126 } 2127 2128 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2129 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2130 for (i=0; i<numRootsB; i++) { 2131 remotePointsBA[i].rank = -1; 2132 remotePointsBA[i].index = -1; 2133 } 2134 2135 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2136 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2137 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2138 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2139 if (remotePointsBA[i].rank == -1) continue; 2140 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2141 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2142 localPointsBA[numLeavesBA] = i; 2143 numLeavesBA++; 2144 } 2145 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2146 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2147 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 /* 2152 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2153 2154 Input Parameters: 2155 . sf - The global PetscSF 2156 2157 Output Parameters: 2158 . out - The local PetscSF 2159 */ 2160 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2161 { 2162 MPI_Comm comm; 2163 PetscMPIInt myrank; 2164 const PetscInt *ilocal; 2165 const PetscSFNode *iremote; 2166 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2167 PetscSFNode *liremote; 2168 PetscSF lsf; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2173 if (sf->ops->CreateLocalSF) { 2174 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2175 } else { 2176 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2177 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2178 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2179 2180 /* Find out local edges and build a local SF */ 2181 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2182 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2183 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2184 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2185 2186 for (i=j=0; i<nleaves; i++) { 2187 if (iremote[i].rank == (PetscInt)myrank) { 2188 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2189 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2190 liremote[j].index = iremote[i].index; 2191 j++; 2192 } 2193 } 2194 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2195 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2196 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2197 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2198 *out = lsf; 2199 } 2200 PetscFunctionReturn(0); 2201 } 2202 2203 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2204 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2205 { 2206 PetscErrorCode ierr; 2207 PetscMemType rootmtype,leafmtype; 2208 2209 PetscFunctionBegin; 2210 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2211 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2212 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2213 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2214 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2215 if (sf->ops->BcastToZero) { 2216 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2217 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2218 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222