1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_HAVE_CUDA) 6 #include <cuda_runtime.h> 7 #endif 8 9 #if defined(PETSC_HAVE_HIP) 10 #include <hip/hip_runtime.h> 11 #endif 12 13 #if defined(PETSC_USE_DEBUG) 14 # define PetscSFCheckGraphSet(sf,arg) do { \ 15 if (PetscUnlikely(!(sf)->graphset)) \ 16 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 17 } while (0) 18 #else 19 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 20 #endif 21 22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 23 24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 25 { 26 PetscFunctionBegin; 27 PetscValidPointer(type,2); 28 *type = PETSC_MEMTYPE_HOST; 29 #if defined(PETSC_HAVE_CUDA) 30 if (PetscCUDAInitialized && data) { 31 cudaError_t cerr; 32 struct cudaPointerAttributes attr; 33 enum cudaMemoryType mtype; 34 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 35 cudaGetLastError(); /* Reset the last error */ 36 #if (CUDART_VERSION < 10000) 37 mtype = attr.memoryType; 38 #else 39 mtype = attr.type; 40 #endif 41 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 42 } 43 #endif 44 45 #if defined(PETSC_HAVE_HIP) 46 if (PetscHIPInitialized && data) { 47 hipError_t cerr; 48 struct hipPointerAttribute_t attr; 49 enum hipMemoryType mtype; 50 cerr = hipPointerGetAttributes(&attr,data); 51 hipGetLastError(); /* Reset the last error */ 52 mtype = attr.memoryType; 53 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 54 } 55 #endif 56 PetscFunctionReturn(0); 57 } 58 59 /*@ 60 PetscSFCreate - create a star forest communication context 61 62 Collective 63 64 Input Parameter: 65 . comm - communicator on which the star forest will operate 66 67 Output Parameter: 68 . sf - new star forest context 69 70 Options Database Keys: 71 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 72 . -sf_type window -Use MPI-3 one-sided window for communication 73 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 74 75 Level: intermediate 76 77 Notes: 78 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 79 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 80 SFs are optimized and they have better performance than general SFs. 81 82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 83 @*/ 84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 85 { 86 PetscErrorCode ierr; 87 PetscSF b; 88 89 PetscFunctionBegin; 90 PetscValidPointer(sf,2); 91 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 92 93 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 94 95 b->nroots = -1; 96 b->nleaves = -1; 97 b->minleaf = PETSC_MAX_INT; 98 b->maxleaf = PETSC_MIN_INT; 99 b->nranks = -1; 100 b->rankorder = PETSC_TRUE; 101 b->ingroup = MPI_GROUP_NULL; 102 b->outgroup = MPI_GROUP_NULL; 103 b->graphset = PETSC_FALSE; 104 #if defined(PETSC_HAVE_DEVICE) 105 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 106 b->use_stream_aware_mpi = PETSC_FALSE; 107 b->unknown_input_stream= PETSC_FALSE; 108 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 109 b->backend = PETSCSF_BACKEND_KOKKOS; 110 #elif defined(PETSC_HAVE_CUDA) 111 b->backend = PETSCSF_BACKEND_CUDA; 112 #elif defined(PETSC_HAVE_HIP) 113 b->backend = PETSCSF_BACKEND_HIP; 114 #endif 115 116 #if defined(PETSC_HAVE_NVSHMEM) 117 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 118 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 119 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 120 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 121 #endif 122 #endif 123 b->vscat.from_n = -1; 124 b->vscat.to_n = -1; 125 b->vscat.unit = MPIU_SCALAR; 126 *sf = b; 127 PetscFunctionReturn(0); 128 } 129 130 /*@ 131 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 132 133 Collective 134 135 Input Parameter: 136 . sf - star forest 137 138 Level: advanced 139 140 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 141 @*/ 142 PetscErrorCode PetscSFReset(PetscSF sf) 143 { 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 148 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 149 sf->nroots = -1; 150 sf->nleaves = -1; 151 sf->minleaf = PETSC_MAX_INT; 152 sf->maxleaf = PETSC_MIN_INT; 153 sf->mine = NULL; 154 sf->remote = NULL; 155 sf->graphset = PETSC_FALSE; 156 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 157 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 158 sf->nranks = -1; 159 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 160 sf->degreeknown = PETSC_FALSE; 161 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 162 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 163 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 164 if (sf->multi) sf->multi->multi = NULL; 165 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 166 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 167 168 #if defined(PETSC_HAVE_DEVICE) 169 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 170 #endif 171 172 sf->setupcalled = PETSC_FALSE; 173 PetscFunctionReturn(0); 174 } 175 176 /*@C 177 PetscSFSetType - Set the PetscSF communication implementation 178 179 Collective on PetscSF 180 181 Input Parameters: 182 + sf - the PetscSF context 183 - type - a known method 184 185 Options Database Key: 186 . -sf_type <type> - Sets the method; use -help for a list 187 of available methods (for instance, window, basic, neighbor) 188 189 Notes: 190 See "include/petscsf.h" for available methods (for instance) 191 + PETSCSFWINDOW - MPI-2/3 one-sided 192 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 193 194 Level: intermediate 195 196 .seealso: PetscSFType, PetscSFCreate() 197 @*/ 198 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 199 { 200 PetscErrorCode ierr,(*r)(PetscSF); 201 PetscBool match; 202 203 PetscFunctionBegin; 204 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 205 PetscValidCharPointer(type,2); 206 207 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 208 if (match) PetscFunctionReturn(0); 209 210 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 211 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 212 /* Destroy the previous PetscSF implementation context */ 213 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 214 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 215 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 216 ierr = (*r)(sf);CHKERRQ(ierr); 217 PetscFunctionReturn(0); 218 } 219 220 /*@C 221 PetscSFGetType - Get the PetscSF communication implementation 222 223 Not Collective 224 225 Input Parameter: 226 . sf - the PetscSF context 227 228 Output Parameter: 229 . type - the PetscSF type name 230 231 Level: intermediate 232 233 .seealso: PetscSFSetType(), PetscSFCreate() 234 @*/ 235 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 236 { 237 PetscFunctionBegin; 238 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 239 PetscValidPointer(type,2); 240 *type = ((PetscObject)sf)->type_name; 241 PetscFunctionReturn(0); 242 } 243 244 /*@C 245 PetscSFDestroy - destroy star forest 246 247 Collective 248 249 Input Parameter: 250 . sf - address of star forest 251 252 Level: intermediate 253 254 .seealso: PetscSFCreate(), PetscSFReset() 255 @*/ 256 PetscErrorCode PetscSFDestroy(PetscSF *sf) 257 { 258 PetscErrorCode ierr; 259 260 PetscFunctionBegin; 261 if (!*sf) PetscFunctionReturn(0); 262 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 263 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 264 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 265 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 266 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 267 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 268 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 269 PetscFunctionReturn(0); 270 } 271 272 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 273 { 274 PetscInt i, nleaves; 275 PetscMPIInt size; 276 const PetscInt *ilocal; 277 const PetscSFNode *iremote; 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 282 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 283 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 284 for (i = 0; i < nleaves; i++) { 285 const PetscInt rank = iremote[i].rank; 286 const PetscInt remote = iremote[i].index; 287 const PetscInt leaf = ilocal ? ilocal[i] : i; 288 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 289 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 290 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 291 } 292 PetscFunctionReturn(0); 293 } 294 295 /*@ 296 PetscSFSetUp - set up communication structures 297 298 Collective 299 300 Input Parameter: 301 . sf - star forest communication object 302 303 Level: beginner 304 305 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 306 @*/ 307 PetscErrorCode PetscSFSetUp(PetscSF sf) 308 { 309 PetscErrorCode ierr; 310 311 PetscFunctionBegin; 312 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 313 PetscSFCheckGraphSet(sf,1); 314 if (sf->setupcalled) PetscFunctionReturn(0); 315 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 316 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 317 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 318 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 319 #if defined(PETSC_HAVE_CUDA) 320 if (sf->backend == PETSCSF_BACKEND_CUDA) { 321 sf->ops->Malloc = PetscSFMalloc_CUDA; 322 sf->ops->Free = PetscSFFree_CUDA; 323 } 324 #endif 325 #if defined(PETSC_HAVE_HIP) 326 if (sf->backend == PETSCSF_BACKEND_HIP) { 327 sf->ops->Malloc = PetscSFMalloc_HIP; 328 sf->ops->Free = PetscSFFree_HIP; 329 } 330 #endif 331 332 # 333 #if defined(PETSC_HAVE_KOKKOS) 334 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 335 sf->ops->Malloc = PetscSFMalloc_Kokkos; 336 sf->ops->Free = PetscSFFree_Kokkos; 337 } 338 #endif 339 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 340 sf->setupcalled = PETSC_TRUE; 341 PetscFunctionReturn(0); 342 } 343 344 /*@ 345 PetscSFSetFromOptions - set PetscSF options using the options database 346 347 Logically Collective 348 349 Input Parameter: 350 . sf - star forest 351 352 Options Database Keys: 353 + -sf_type - implementation type, see PetscSFSetType() 354 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 355 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 356 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 357 If true, this option only works with -use_gpu_aware_mpi 1. 358 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 359 If true, this option only works with -use_gpu_aware_mpi 1. 360 361 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 362 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 363 the only available is kokkos. 364 365 Level: intermediate 366 @*/ 367 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 368 { 369 PetscSFType deft; 370 char type[256]; 371 PetscErrorCode ierr; 372 PetscBool flg; 373 374 PetscFunctionBegin; 375 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 376 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 377 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 378 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 379 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 380 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 381 #if defined(PETSC_HAVE_DEVICE) 382 { 383 char backendstr[32] = {0}; 384 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 385 /* Change the defaults set in PetscSFCreate() with command line options */ 386 ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 387 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 388 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 389 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 390 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 391 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 392 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 393 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 394 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 395 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 396 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 397 #elif defined(PETSC_HAVE_KOKKOS) 398 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 399 #endif 400 } 401 #endif 402 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 403 ierr = PetscOptionsEnd();CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 /*@ 408 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 409 410 Logically Collective 411 412 Input Parameters: 413 + sf - star forest 414 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 415 416 Level: advanced 417 418 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 419 @*/ 420 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 421 { 422 PetscFunctionBegin; 423 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 424 PetscValidLogicalCollectiveBool(sf,flg,2); 425 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 426 sf->rankorder = flg; 427 PetscFunctionReturn(0); 428 } 429 430 /*@C 431 PetscSFSetGraph - Set a parallel star forest 432 433 Collective 434 435 Input Parameters: 436 + sf - star forest 437 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 438 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 439 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 440 during setup in debug mode) 441 . localmode - copy mode for ilocal 442 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 443 during setup in debug mode) 444 - remotemode - copy mode for iremote 445 446 Level: intermediate 447 448 Notes: 449 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 450 451 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 452 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 453 needed 454 455 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 456 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 457 458 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 459 @*/ 460 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 461 { 462 PetscErrorCode ierr; 463 464 PetscFunctionBegin; 465 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 466 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 467 if (nleaves > 0) PetscValidPointer(iremote,6); 468 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 469 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 470 471 if (sf->nroots >= 0) { /* Reset only if graph already set */ 472 ierr = PetscSFReset(sf);CHKERRQ(ierr); 473 } 474 475 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 476 477 sf->nroots = nroots; 478 sf->nleaves = nleaves; 479 480 if (nleaves && ilocal) { 481 PetscInt i; 482 PetscInt minleaf = PETSC_MAX_INT; 483 PetscInt maxleaf = PETSC_MIN_INT; 484 int contiguous = 1; 485 for (i=0; i<nleaves; i++) { 486 minleaf = PetscMin(minleaf,ilocal[i]); 487 maxleaf = PetscMax(maxleaf,ilocal[i]); 488 contiguous &= (ilocal[i] == i); 489 } 490 sf->minleaf = minleaf; 491 sf->maxleaf = maxleaf; 492 if (contiguous) { 493 if (localmode == PETSC_OWN_POINTER) { 494 ierr = PetscFree(ilocal);CHKERRQ(ierr); 495 } 496 ilocal = NULL; 497 } 498 } else { 499 sf->minleaf = 0; 500 sf->maxleaf = nleaves - 1; 501 } 502 503 if (ilocal) { 504 switch (localmode) { 505 case PETSC_COPY_VALUES: 506 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 507 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 508 sf->mine = sf->mine_alloc; 509 break; 510 case PETSC_OWN_POINTER: 511 sf->mine_alloc = (PetscInt*)ilocal; 512 sf->mine = sf->mine_alloc; 513 break; 514 case PETSC_USE_POINTER: 515 sf->mine_alloc = NULL; 516 sf->mine = (PetscInt*)ilocal; 517 break; 518 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 519 } 520 } 521 522 switch (remotemode) { 523 case PETSC_COPY_VALUES: 524 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 525 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 526 sf->remote = sf->remote_alloc; 527 break; 528 case PETSC_OWN_POINTER: 529 sf->remote_alloc = (PetscSFNode*)iremote; 530 sf->remote = sf->remote_alloc; 531 break; 532 case PETSC_USE_POINTER: 533 sf->remote_alloc = NULL; 534 sf->remote = (PetscSFNode*)iremote; 535 break; 536 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 537 } 538 539 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 540 sf->graphset = PETSC_TRUE; 541 PetscFunctionReturn(0); 542 } 543 544 /*@ 545 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 546 547 Collective 548 549 Input Parameters: 550 + sf - The PetscSF 551 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 552 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 553 554 Notes: 555 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 556 n and N are local and global sizes of x respectively. 557 558 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 559 sequential vectors y on all ranks. 560 561 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 562 sequential vector y on rank 0. 563 564 In above cases, entries of x are roots and entries of y are leaves. 565 566 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 567 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 568 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 569 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 570 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 571 572 In this case, roots and leaves are symmetric. 573 574 Level: intermediate 575 @*/ 576 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 577 { 578 MPI_Comm comm; 579 PetscInt n,N,res[2]; 580 PetscMPIInt rank,size; 581 PetscSFType type; 582 PetscErrorCode ierr; 583 584 PetscFunctionBegin; 585 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 586 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 587 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 588 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 589 590 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 591 type = PETSCSFALLTOALL; 592 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 593 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 594 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 595 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 596 } else { 597 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 598 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 599 res[0] = n; 600 res[1] = -n; 601 /* Check if n are same over all ranks so that we can optimize it */ 602 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 603 if (res[0] == -res[1]) { /* same n */ 604 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 605 } else { 606 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 607 } 608 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 609 } 610 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 611 612 sf->pattern = pattern; 613 sf->mine = NULL; /* Contiguous */ 614 615 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 616 Also set other easy stuff. 617 */ 618 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 619 sf->nleaves = N; 620 sf->nroots = n; 621 sf->nranks = size; 622 sf->minleaf = 0; 623 sf->maxleaf = N - 1; 624 } else if (pattern == PETSCSF_PATTERN_GATHER) { 625 sf->nleaves = rank ? 0 : N; 626 sf->nroots = n; 627 sf->nranks = rank ? 0 : size; 628 sf->minleaf = 0; 629 sf->maxleaf = rank ? -1 : N - 1; 630 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 631 sf->nleaves = size; 632 sf->nroots = size; 633 sf->nranks = size; 634 sf->minleaf = 0; 635 sf->maxleaf = size - 1; 636 } 637 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 638 sf->graphset = PETSC_TRUE; 639 PetscFunctionReturn(0); 640 } 641 642 /*@ 643 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 644 645 Collective 646 647 Input Parameter: 648 . sf - star forest to invert 649 650 Output Parameter: 651 . isf - inverse of sf 652 653 Level: advanced 654 655 Notes: 656 All roots must have degree 1. 657 658 The local space may be a permutation, but cannot be sparse. 659 660 .seealso: PetscSFSetGraph() 661 @*/ 662 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 663 { 664 PetscErrorCode ierr; 665 PetscMPIInt rank; 666 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 667 const PetscInt *ilocal; 668 PetscSFNode *roots,*leaves; 669 670 PetscFunctionBegin; 671 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 672 PetscSFCheckGraphSet(sf,1); 673 PetscValidPointer(isf,2); 674 675 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 676 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 677 678 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 679 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 680 for (i=0; i<maxlocal; i++) { 681 leaves[i].rank = rank; 682 leaves[i].index = i; 683 } 684 for (i=0; i <nroots; i++) { 685 roots[i].rank = -1; 686 roots[i].index = -1; 687 } 688 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 689 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 690 691 /* Check whether our leaves are sparse */ 692 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 693 if (count == nroots) newilocal = NULL; 694 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 695 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 696 for (i=0,count=0; i<nroots; i++) { 697 if (roots[i].rank >= 0) { 698 newilocal[count] = i; 699 roots[count].rank = roots[i].rank; 700 roots[count].index = roots[i].index; 701 count++; 702 } 703 } 704 } 705 706 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 707 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 708 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 /*@ 713 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 714 715 Collective 716 717 Input Parameters: 718 + sf - communication object to duplicate 719 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 720 721 Output Parameter: 722 . newsf - new communication object 723 724 Level: beginner 725 726 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 727 @*/ 728 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 729 { 730 PetscSFType type; 731 PetscErrorCode ierr; 732 MPI_Datatype dtype=MPIU_SCALAR; 733 734 PetscFunctionBegin; 735 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 736 PetscValidLogicalCollectiveEnum(sf,opt,2); 737 PetscValidPointer(newsf,3); 738 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 739 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 740 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 741 if (opt == PETSCSF_DUPLICATE_GRAPH) { 742 PetscSFCheckGraphSet(sf,1); 743 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 744 PetscInt nroots,nleaves; 745 const PetscInt *ilocal; 746 const PetscSFNode *iremote; 747 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 748 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 749 } else { 750 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 751 } 752 } 753 /* Since oldtype is committed, so is newtype, according to MPI */ 754 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 755 (*newsf)->vscat.bs = sf->vscat.bs; 756 (*newsf)->vscat.unit = dtype; 757 (*newsf)->vscat.to_n = sf->vscat.to_n; 758 (*newsf)->vscat.from_n = sf->vscat.from_n; 759 /* Do not copy lsf. Build it on demand since it is rarely used */ 760 761 #if defined(PETSC_HAVE_DEVICE) 762 (*newsf)->backend = sf->backend; 763 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 764 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 765 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 766 #endif 767 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 768 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 769 PetscFunctionReturn(0); 770 } 771 772 /*@C 773 PetscSFGetGraph - Get the graph specifying a parallel star forest 774 775 Not Collective 776 777 Input Parameter: 778 . sf - star forest 779 780 Output Parameters: 781 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 782 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 783 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 784 - iremote - remote locations of root vertices for each leaf on the current process 785 786 Notes: 787 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 788 789 Fortran Notes: 790 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 791 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 792 793 To check for a NULL ilocal use 794 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 795 796 Level: intermediate 797 798 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 799 @*/ 800 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 801 { 802 PetscErrorCode ierr; 803 804 PetscFunctionBegin; 805 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 806 if (sf->ops->GetGraph) { 807 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 808 } else { 809 if (nroots) *nroots = sf->nroots; 810 if (nleaves) *nleaves = sf->nleaves; 811 if (ilocal) *ilocal = sf->mine; 812 if (iremote) *iremote = sf->remote; 813 } 814 PetscFunctionReturn(0); 815 } 816 817 /*@ 818 PetscSFGetLeafRange - Get the active leaf ranges 819 820 Not Collective 821 822 Input Parameter: 823 . sf - star forest 824 825 Output Parameters: 826 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 827 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 828 829 Level: developer 830 831 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 832 @*/ 833 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 834 { 835 PetscFunctionBegin; 836 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 837 PetscSFCheckGraphSet(sf,1); 838 if (minleaf) *minleaf = sf->minleaf; 839 if (maxleaf) *maxleaf = sf->maxleaf; 840 PetscFunctionReturn(0); 841 } 842 843 /*@C 844 PetscSFViewFromOptions - View from Options 845 846 Collective on PetscSF 847 848 Input Parameters: 849 + A - the star forest 850 . obj - Optional object 851 - name - command line option 852 853 Level: intermediate 854 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 855 @*/ 856 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 857 { 858 PetscErrorCode ierr; 859 860 PetscFunctionBegin; 861 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 862 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 /*@C 867 PetscSFView - view a star forest 868 869 Collective 870 871 Input Parameters: 872 + sf - star forest 873 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 874 875 Level: beginner 876 877 .seealso: PetscSFCreate(), PetscSFSetGraph() 878 @*/ 879 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 880 { 881 PetscErrorCode ierr; 882 PetscBool iascii; 883 PetscViewerFormat format; 884 885 PetscFunctionBegin; 886 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 887 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 888 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 889 PetscCheckSameComm(sf,1,viewer,2); 890 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 891 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 892 if (iascii) { 893 PetscMPIInt rank; 894 PetscInt ii,i,j; 895 896 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 897 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 898 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 899 if (!sf->graphset) { 900 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 901 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 905 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 906 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 907 for (i=0; i<sf->nleaves; i++) { 908 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 909 } 910 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 911 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 912 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 913 PetscMPIInt *tmpranks,*perm; 914 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 915 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 916 for (i=0; i<sf->nranks; i++) perm[i] = i; 917 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 918 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 919 for (ii=0; ii<sf->nranks; ii++) { 920 i = perm[ii]; 921 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 922 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 923 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 924 } 925 } 926 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 927 } 928 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 929 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 930 } 931 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 932 } 933 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 934 PetscFunctionReturn(0); 935 } 936 937 /*@C 938 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 939 940 Not Collective 941 942 Input Parameter: 943 . sf - star forest 944 945 Output Parameters: 946 + nranks - number of ranks referenced by local part 947 . ranks - array of ranks 948 . roffset - offset in rmine/rremote for each rank (length nranks+1) 949 . rmine - concatenated array holding local indices referencing each remote rank 950 - rremote - concatenated array holding remote indices referenced for each remote rank 951 952 Level: developer 953 954 .seealso: PetscSFGetLeafRanks() 955 @*/ 956 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 957 { 958 PetscErrorCode ierr; 959 960 PetscFunctionBegin; 961 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 962 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 963 if (sf->ops->GetRootRanks) { 964 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 965 } else { 966 /* The generic implementation */ 967 if (nranks) *nranks = sf->nranks; 968 if (ranks) *ranks = sf->ranks; 969 if (roffset) *roffset = sf->roffset; 970 if (rmine) *rmine = sf->rmine; 971 if (rremote) *rremote = sf->rremote; 972 } 973 PetscFunctionReturn(0); 974 } 975 976 /*@C 977 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 978 979 Not Collective 980 981 Input Parameter: 982 . sf - star forest 983 984 Output Parameters: 985 + niranks - number of leaf ranks referencing roots on this process 986 . iranks - array of ranks 987 . ioffset - offset in irootloc for each rank (length niranks+1) 988 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 989 990 Level: developer 991 992 .seealso: PetscSFGetRootRanks() 993 @*/ 994 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 995 { 996 PetscErrorCode ierr; 997 998 PetscFunctionBegin; 999 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1000 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 1001 if (sf->ops->GetLeafRanks) { 1002 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 1003 } else { 1004 PetscSFType type; 1005 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 1006 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1007 } 1008 PetscFunctionReturn(0); 1009 } 1010 1011 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 1012 PetscInt i; 1013 for (i=0; i<n; i++) { 1014 if (needle == list[i]) return PETSC_TRUE; 1015 } 1016 return PETSC_FALSE; 1017 } 1018 1019 /*@C 1020 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 1021 1022 Collective 1023 1024 Input Parameters: 1025 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1026 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1027 1028 Level: developer 1029 1030 .seealso: PetscSFGetRootRanks() 1031 @*/ 1032 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1033 { 1034 PetscErrorCode ierr; 1035 PetscTable table; 1036 PetscTablePosition pos; 1037 PetscMPIInt size,groupsize,*groupranks; 1038 PetscInt *rcount,*ranks; 1039 PetscInt i, irank = -1,orank = -1; 1040 1041 PetscFunctionBegin; 1042 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1043 PetscSFCheckGraphSet(sf,1); 1044 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1045 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1046 for (i=0; i<sf->nleaves; i++) { 1047 /* Log 1-based rank */ 1048 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1049 } 1050 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1051 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1052 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1053 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1054 for (i=0; i<sf->nranks; i++) { 1055 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1056 ranks[i]--; /* Convert back to 0-based */ 1057 } 1058 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1059 1060 /* We expect that dgroup is reliably "small" while nranks could be large */ 1061 { 1062 MPI_Group group = MPI_GROUP_NULL; 1063 PetscMPIInt *dgroupranks; 1064 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1065 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1066 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1067 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1068 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1069 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1070 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1071 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1072 } 1073 1074 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1075 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1076 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1077 if (InList(ranks[i],groupsize,groupranks)) break; 1078 } 1079 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1080 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1081 } 1082 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1083 PetscInt tmprank,tmpcount; 1084 1085 tmprank = ranks[i]; 1086 tmpcount = rcount[i]; 1087 ranks[i] = ranks[sf->ndranks]; 1088 rcount[i] = rcount[sf->ndranks]; 1089 ranks[sf->ndranks] = tmprank; 1090 rcount[sf->ndranks] = tmpcount; 1091 sf->ndranks++; 1092 } 1093 } 1094 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1095 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1096 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1097 sf->roffset[0] = 0; 1098 for (i=0; i<sf->nranks; i++) { 1099 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1100 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1101 rcount[i] = 0; 1102 } 1103 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1104 /* short circuit */ 1105 if (orank != sf->remote[i].rank) { 1106 /* Search for index of iremote[i].rank in sf->ranks */ 1107 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1108 if (irank < 0) { 1109 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1110 if (irank >= 0) irank += sf->ndranks; 1111 } 1112 orank = sf->remote[i].rank; 1113 } 1114 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1115 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1116 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1117 rcount[irank]++; 1118 } 1119 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 /*@C 1124 PetscSFGetGroups - gets incoming and outgoing process groups 1125 1126 Collective 1127 1128 Input Parameter: 1129 . sf - star forest 1130 1131 Output Parameters: 1132 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1133 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1134 1135 Level: developer 1136 1137 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1138 @*/ 1139 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1140 { 1141 PetscErrorCode ierr; 1142 MPI_Group group = MPI_GROUP_NULL; 1143 1144 PetscFunctionBegin; 1145 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1146 if (sf->ingroup == MPI_GROUP_NULL) { 1147 PetscInt i; 1148 const PetscInt *indegree; 1149 PetscMPIInt rank,*outranks,*inranks; 1150 PetscSFNode *remote; 1151 PetscSF bgcount; 1152 1153 /* Compute the number of incoming ranks */ 1154 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1155 for (i=0; i<sf->nranks; i++) { 1156 remote[i].rank = sf->ranks[i]; 1157 remote[i].index = 0; 1158 } 1159 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1160 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1161 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1162 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1163 /* Enumerate the incoming ranks */ 1164 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1165 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1166 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1167 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1168 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1169 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1170 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1171 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1172 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1173 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1174 } 1175 *incoming = sf->ingroup; 1176 1177 if (sf->outgroup == MPI_GROUP_NULL) { 1178 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1179 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1180 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1181 } 1182 *outgoing = sf->outgroup; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 /*@ 1187 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1188 1189 Collective 1190 1191 Input Parameter: 1192 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1193 1194 Output Parameter: 1195 . multi - star forest with split roots, such that each root has degree exactly 1 1196 1197 Level: developer 1198 1199 Notes: 1200 1201 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1202 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1203 edge, it is a candidate for future optimization that might involve its removal. 1204 1205 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1206 @*/ 1207 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1208 { 1209 PetscErrorCode ierr; 1210 1211 PetscFunctionBegin; 1212 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1213 PetscValidPointer(multi,2); 1214 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1215 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1216 *multi = sf->multi; 1217 sf->multi->multi = sf->multi; 1218 PetscFunctionReturn(0); 1219 } 1220 if (!sf->multi) { 1221 const PetscInt *indegree; 1222 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1223 PetscSFNode *remote; 1224 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1225 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1226 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1227 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1228 inoffset[0] = 0; 1229 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1230 for (i=0; i<maxlocal; i++) outones[i] = 1; 1231 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1232 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1233 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1234 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1235 for (i=0; i<sf->nroots; i++) { 1236 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1237 } 1238 } 1239 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1240 for (i=0; i<sf->nleaves; i++) { 1241 remote[i].rank = sf->remote[i].rank; 1242 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1243 } 1244 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1245 sf->multi->multi = sf->multi; 1246 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1247 if (sf->rankorder) { /* Sort the ranks */ 1248 PetscMPIInt rank; 1249 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1250 PetscSFNode *newremote; 1251 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1252 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1253 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1254 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1255 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1256 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1257 /* Sort the incoming ranks at each vertex, build the inverse map */ 1258 for (i=0; i<sf->nroots; i++) { 1259 PetscInt j; 1260 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1261 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1262 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1263 } 1264 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1265 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1266 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1267 for (i=0; i<sf->nleaves; i++) { 1268 newremote[i].rank = sf->remote[i].rank; 1269 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1270 } 1271 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1272 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1273 } 1274 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1275 } 1276 *multi = sf->multi; 1277 PetscFunctionReturn(0); 1278 } 1279 1280 /*@C 1281 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1282 1283 Collective 1284 1285 Input Parameters: 1286 + sf - original star forest 1287 . nselected - number of selected roots on this process 1288 - selected - indices of the selected roots on this process 1289 1290 Output Parameter: 1291 . esf - new star forest 1292 1293 Level: advanced 1294 1295 Note: 1296 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1297 be done by calling PetscSFGetGraph(). 1298 1299 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1300 @*/ 1301 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1302 { 1303 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1304 const PetscInt *ilocal; 1305 signed char *rootdata,*leafdata,*leafmem; 1306 const PetscSFNode *iremote; 1307 PetscSFNode *new_iremote; 1308 MPI_Comm comm; 1309 PetscErrorCode ierr; 1310 1311 PetscFunctionBegin; 1312 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1313 PetscSFCheckGraphSet(sf,1); 1314 if (nselected) PetscValidPointer(selected,3); 1315 PetscValidPointer(esf,4); 1316 1317 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1318 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1319 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1320 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1321 1322 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1323 PetscBool dups; 1324 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1325 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1326 for (i=0; i<nselected; i++) 1327 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1328 } 1329 1330 if (sf->ops->CreateEmbeddedRootSF) { 1331 ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1332 } else { 1333 /* A generic version of creating embedded sf */ 1334 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1335 maxlocal = maxleaf - minleaf + 1; 1336 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1337 leafdata = leafmem - minleaf; 1338 /* Tag selected roots and bcast to leaves */ 1339 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1340 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1341 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1342 1343 /* Build esf with leaves that are still connected */ 1344 esf_nleaves = 0; 1345 for (i=0; i<nleaves; i++) { 1346 j = ilocal ? ilocal[i] : i; 1347 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1348 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1349 */ 1350 esf_nleaves += (leafdata[j] ? 1 : 0); 1351 } 1352 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1353 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1354 for (i=n=0; i<nleaves; i++) { 1355 j = ilocal ? ilocal[i] : i; 1356 if (leafdata[j]) { 1357 new_ilocal[n] = j; 1358 new_iremote[n].rank = iremote[i].rank; 1359 new_iremote[n].index = iremote[i].index; 1360 ++n; 1361 } 1362 } 1363 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1364 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1365 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1366 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1367 } 1368 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 /*@C 1373 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1374 1375 Collective 1376 1377 Input Parameters: 1378 + sf - original star forest 1379 . nselected - number of selected leaves on this process 1380 - selected - indices of the selected leaves on this process 1381 1382 Output Parameter: 1383 . newsf - new star forest 1384 1385 Level: advanced 1386 1387 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1388 @*/ 1389 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1390 { 1391 const PetscSFNode *iremote; 1392 PetscSFNode *new_iremote; 1393 const PetscInt *ilocal; 1394 PetscInt i,nroots,*leaves,*new_ilocal; 1395 MPI_Comm comm; 1396 PetscErrorCode ierr; 1397 1398 PetscFunctionBegin; 1399 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1400 PetscSFCheckGraphSet(sf,1); 1401 if (nselected) PetscValidPointer(selected,3); 1402 PetscValidPointer(newsf,4); 1403 1404 /* Uniq selected[] and put results in leaves[] */ 1405 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1406 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1407 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1408 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1409 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1410 1411 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1412 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1413 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1416 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1417 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1418 for (i=0; i<nselected; ++i) { 1419 const PetscInt l = leaves[i]; 1420 new_ilocal[i] = ilocal ? ilocal[l] : l; 1421 new_iremote[i].rank = iremote[l].rank; 1422 new_iremote[i].index = iremote[l].index; 1423 } 1424 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1425 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1426 } 1427 ierr = PetscFree(leaves);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 /*@C 1432 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1433 1434 Collective on PetscSF 1435 1436 Input Parameters: 1437 + sf - star forest on which to communicate 1438 . unit - data type associated with each node 1439 . rootdata - buffer to broadcast 1440 - op - operation to use for reduction 1441 1442 Output Parameter: 1443 . leafdata - buffer to be reduced with values from each leaf's respective root 1444 1445 Level: intermediate 1446 1447 Notes: 1448 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1449 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1450 use PetscSFBcastWithMemTypeBegin() instead. 1451 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1452 @*/ 1453 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1454 { 1455 PetscErrorCode ierr; 1456 PetscMemType rootmtype,leafmtype; 1457 1458 PetscFunctionBegin; 1459 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1460 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1461 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1462 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1463 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1464 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1465 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1466 PetscFunctionReturn(0); 1467 } 1468 1469 /*@C 1470 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1471 1472 Collective on PetscSF 1473 1474 Input Parameters: 1475 + sf - star forest on which to communicate 1476 . unit - data type associated with each node 1477 . rootmtype - memory type of rootdata 1478 . rootdata - buffer to broadcast 1479 . leafmtype - memory type of leafdata 1480 - op - operation to use for reduction 1481 1482 Output Parameter: 1483 . leafdata - buffer to be reduced with values from each leaf's respective root 1484 1485 Level: intermediate 1486 1487 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1488 @*/ 1489 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1490 { 1491 PetscErrorCode ierr; 1492 1493 PetscFunctionBegin; 1494 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1495 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1496 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1497 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1498 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1499 PetscFunctionReturn(0); 1500 } 1501 1502 /*@C 1503 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1504 1505 Collective 1506 1507 Input Parameters: 1508 + sf - star forest 1509 . unit - data type 1510 . rootdata - buffer to broadcast 1511 - op - operation to use for reduction 1512 1513 Output Parameter: 1514 . leafdata - buffer to be reduced with values from each leaf's respective root 1515 1516 Level: intermediate 1517 1518 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1519 @*/ 1520 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1521 { 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1526 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1527 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1528 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1529 PetscFunctionReturn(0); 1530 } 1531 1532 /*@C 1533 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1534 1535 Collective 1536 1537 Input Parameters: 1538 + sf - star forest 1539 . unit - data type 1540 . leafdata - values to reduce 1541 - op - reduction operation 1542 1543 Output Parameter: 1544 . rootdata - result of reduction of values from all leaves of each root 1545 1546 Level: intermediate 1547 1548 Notes: 1549 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1550 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1551 use PetscSFReduceWithMemTypeBegin() instead. 1552 1553 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1554 @*/ 1555 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1556 { 1557 PetscErrorCode ierr; 1558 PetscMemType rootmtype,leafmtype; 1559 1560 PetscFunctionBegin; 1561 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1562 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1563 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1564 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1565 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1566 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1567 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1568 PetscFunctionReturn(0); 1569 } 1570 1571 /*@C 1572 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1573 1574 Collective 1575 1576 Input Parameters: 1577 + sf - star forest 1578 . unit - data type 1579 . leafmtype - memory type of leafdata 1580 . leafdata - values to reduce 1581 . rootmtype - memory type of rootdata 1582 - op - reduction operation 1583 1584 Output Parameter: 1585 . rootdata - result of reduction of values from all leaves of each root 1586 1587 Level: intermediate 1588 1589 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1590 @*/ 1591 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1592 { 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1597 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1598 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1599 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1600 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1601 PetscFunctionReturn(0); 1602 } 1603 1604 /*@C 1605 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1606 1607 Collective 1608 1609 Input Parameters: 1610 + sf - star forest 1611 . unit - data type 1612 . leafdata - values to reduce 1613 - op - reduction operation 1614 1615 Output Parameter: 1616 . rootdata - result of reduction of values from all leaves of each root 1617 1618 Level: intermediate 1619 1620 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1621 @*/ 1622 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1623 { 1624 PetscErrorCode ierr; 1625 1626 PetscFunctionBegin; 1627 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1628 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1629 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1630 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1631 PetscFunctionReturn(0); 1632 } 1633 1634 /*@C 1635 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1636 1637 Collective 1638 1639 Input Parameters: 1640 + sf - star forest 1641 . unit - data type 1642 . leafdata - leaf values to use in reduction 1643 - op - operation to use for reduction 1644 1645 Output Parameters: 1646 + rootdata - root values to be updated, input state is seen by first process to perform an update 1647 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1648 1649 Level: advanced 1650 1651 Note: 1652 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1653 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1654 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1655 integers. 1656 1657 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1658 @*/ 1659 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1660 { 1661 PetscErrorCode ierr; 1662 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1663 1664 PetscFunctionBegin; 1665 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1666 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1667 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1668 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1669 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1670 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1671 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1672 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1673 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1674 PetscFunctionReturn(0); 1675 } 1676 1677 /*@C 1678 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1679 1680 Collective 1681 1682 Input Parameters: 1683 + sf - star forest 1684 . unit - data type 1685 . leafdata - leaf values to use in reduction 1686 - op - operation to use for reduction 1687 1688 Output Parameters: 1689 + rootdata - root values to be updated, input state is seen by first process to perform an update 1690 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1691 1692 Level: advanced 1693 1694 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1695 @*/ 1696 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1697 { 1698 PetscErrorCode ierr; 1699 1700 PetscFunctionBegin; 1701 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1702 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1703 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1704 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 /*@C 1709 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1710 1711 Collective 1712 1713 Input Parameter: 1714 . sf - star forest 1715 1716 Output Parameter: 1717 . degree - degree of each root vertex 1718 1719 Level: advanced 1720 1721 Notes: 1722 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1723 1724 .seealso: PetscSFGatherBegin() 1725 @*/ 1726 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1727 { 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1732 PetscSFCheckGraphSet(sf,1); 1733 PetscValidPointer(degree,2); 1734 if (!sf->degreeknown) { 1735 PetscInt i, nroots = sf->nroots, maxlocal; 1736 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1737 maxlocal = sf->maxleaf-sf->minleaf+1; 1738 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1739 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1740 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1741 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1742 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1743 } 1744 *degree = NULL; 1745 PetscFunctionReturn(0); 1746 } 1747 1748 /*@C 1749 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1750 1751 Collective 1752 1753 Input Parameter: 1754 . sf - star forest 1755 1756 Output Parameter: 1757 . degree - degree of each root vertex 1758 1759 Level: developer 1760 1761 Notes: 1762 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1763 1764 .seealso: 1765 @*/ 1766 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1767 { 1768 PetscErrorCode ierr; 1769 1770 PetscFunctionBegin; 1771 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1772 PetscSFCheckGraphSet(sf,1); 1773 PetscValidPointer(degree,2); 1774 if (!sf->degreeknown) { 1775 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1776 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1777 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1778 sf->degreeknown = PETSC_TRUE; 1779 } 1780 *degree = sf->degree; 1781 PetscFunctionReturn(0); 1782 } 1783 1784 /*@C 1785 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1786 Each multi-root is assigned index of the corresponding original root. 1787 1788 Collective 1789 1790 Input Parameters: 1791 + sf - star forest 1792 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1793 1794 Output Parameters: 1795 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1796 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1797 1798 Level: developer 1799 1800 Notes: 1801 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1802 1803 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1804 @*/ 1805 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1806 { 1807 PetscSF msf; 1808 PetscInt i, j, k, nroots, nmroots; 1809 PetscErrorCode ierr; 1810 1811 PetscFunctionBegin; 1812 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1813 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1814 if (nroots) PetscValidIntPointer(degree,2); 1815 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1816 PetscValidPointer(multiRootsOrigNumbering,4); 1817 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1818 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1819 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1820 for (i=0,j=0,k=0; i<nroots; i++) { 1821 if (!degree[i]) continue; 1822 for (j=0; j<degree[i]; j++,k++) { 1823 (*multiRootsOrigNumbering)[k] = i; 1824 } 1825 } 1826 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1827 if (nMultiRoots) *nMultiRoots = nmroots; 1828 PetscFunctionReturn(0); 1829 } 1830 1831 /*@C 1832 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1833 1834 Collective 1835 1836 Input Parameters: 1837 + sf - star forest 1838 . unit - data type 1839 - leafdata - leaf data to gather to roots 1840 1841 Output Parameter: 1842 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1843 1844 Level: intermediate 1845 1846 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1847 @*/ 1848 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1849 { 1850 PetscErrorCode ierr; 1851 PetscSF multi = NULL; 1852 1853 PetscFunctionBegin; 1854 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1855 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1856 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1857 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 /*@C 1862 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1863 1864 Collective 1865 1866 Input Parameters: 1867 + sf - star forest 1868 . unit - data type 1869 - leafdata - leaf data to gather to roots 1870 1871 Output Parameter: 1872 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1873 1874 Level: intermediate 1875 1876 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1877 @*/ 1878 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1879 { 1880 PetscErrorCode ierr; 1881 PetscSF multi = NULL; 1882 1883 PetscFunctionBegin; 1884 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1885 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1886 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1887 PetscFunctionReturn(0); 1888 } 1889 1890 /*@C 1891 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1892 1893 Collective 1894 1895 Input Parameters: 1896 + sf - star forest 1897 . unit - data type 1898 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1899 1900 Output Parameter: 1901 . leafdata - leaf data to be update with personal data from each respective root 1902 1903 Level: intermediate 1904 1905 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1906 @*/ 1907 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1908 { 1909 PetscErrorCode ierr; 1910 PetscSF multi = NULL; 1911 1912 PetscFunctionBegin; 1913 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1914 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1915 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1916 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 /*@C 1921 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1922 1923 Collective 1924 1925 Input Parameters: 1926 + sf - star forest 1927 . unit - data type 1928 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1929 1930 Output Parameter: 1931 . leafdata - leaf data to be update with personal data from each respective root 1932 1933 Level: intermediate 1934 1935 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1936 @*/ 1937 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1938 { 1939 PetscErrorCode ierr; 1940 PetscSF multi = NULL; 1941 1942 PetscFunctionBegin; 1943 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1944 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1945 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1946 PetscFunctionReturn(0); 1947 } 1948 1949 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1950 { 1951 PetscInt i, n, nleaves; 1952 const PetscInt *ilocal = NULL; 1953 PetscHSetI seen; 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 if (PetscDefined(USE_DEBUG)) { 1958 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1959 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1960 for (i = 0; i < nleaves; i++) { 1961 const PetscInt leaf = ilocal ? ilocal[i] : i; 1962 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1963 } 1964 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1965 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1966 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1967 } 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /*@ 1972 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1973 1974 Input Parameters: 1975 + sfA - The first PetscSF 1976 - sfB - The second PetscSF 1977 1978 Output Parameters: 1979 . sfBA - The composite SF 1980 1981 Level: developer 1982 1983 Notes: 1984 Currently, the two SFs must be defined on congruent communicators and they must be true star 1985 forests, i.e. the same leaf is not connected with different roots. 1986 1987 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1988 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1989 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1990 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1991 1992 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1993 @*/ 1994 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1995 { 1996 PetscErrorCode ierr; 1997 const PetscSFNode *remotePointsA,*remotePointsB; 1998 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1999 const PetscInt *localPointsA,*localPointsB; 2000 PetscInt *localPointsBA; 2001 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 2002 PetscBool denseB; 2003 2004 PetscFunctionBegin; 2005 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2006 PetscSFCheckGraphSet(sfA,1); 2007 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2008 PetscSFCheckGraphSet(sfB,2); 2009 PetscCheckSameComm(sfA,1,sfB,2); 2010 PetscValidPointer(sfBA,3); 2011 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2012 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2013 2014 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2015 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2016 if (localPointsA) { 2017 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2018 for (i=0; i<numRootsB; i++) { 2019 reorderedRemotePointsA[i].rank = -1; 2020 reorderedRemotePointsA[i].index = -1; 2021 } 2022 for (i=0; i<numLeavesA; i++) { 2023 if (localPointsA[i] >= numRootsB) continue; 2024 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2025 } 2026 remotePointsA = reorderedRemotePointsA; 2027 } 2028 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2029 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2030 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2031 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2032 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2033 2034 denseB = (PetscBool)!localPointsB; 2035 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2036 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2037 else numLeavesBA++; 2038 } 2039 if (denseB) { 2040 localPointsBA = NULL; 2041 remotePointsBA = leafdataB; 2042 } else { 2043 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2044 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2045 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2046 const PetscInt l = localPointsB ? localPointsB[i] : i; 2047 2048 if (leafdataB[l-minleaf].rank == -1) continue; 2049 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2050 localPointsBA[numLeavesBA] = l; 2051 numLeavesBA++; 2052 } 2053 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2054 } 2055 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2056 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2057 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 /*@ 2062 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2063 2064 Input Parameters: 2065 + sfA - The first PetscSF 2066 - sfB - The second PetscSF 2067 2068 Output Parameters: 2069 . sfBA - The composite SF. 2070 2071 Level: developer 2072 2073 Notes: 2074 Currently, the two SFs must be defined on congruent communicators and they must be true star 2075 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2076 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2077 2078 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2079 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2080 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2081 a Reduce on sfB, on connected roots. 2082 2083 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2084 @*/ 2085 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2086 { 2087 PetscErrorCode ierr; 2088 const PetscSFNode *remotePointsA,*remotePointsB; 2089 PetscSFNode *remotePointsBA; 2090 const PetscInt *localPointsA,*localPointsB; 2091 PetscSFNode *reorderedRemotePointsA = NULL; 2092 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2093 MPI_Op op; 2094 #if defined(PETSC_USE_64BIT_INDICES) 2095 PetscBool iswin; 2096 #endif 2097 2098 PetscFunctionBegin; 2099 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2100 PetscSFCheckGraphSet(sfA,1); 2101 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2102 PetscSFCheckGraphSet(sfB,2); 2103 PetscCheckSameComm(sfA,1,sfB,2); 2104 PetscValidPointer(sfBA,3); 2105 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2106 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2107 2108 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2109 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2110 2111 /* TODO: Check roots of sfB have degree of 1 */ 2112 /* Once we implement it, we can replace the MPI_MAXLOC 2113 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2114 We use MPI_MAXLOC only to have a deterministic output from this routine if 2115 the root condition is not meet. 2116 */ 2117 op = MPI_MAXLOC; 2118 #if defined(PETSC_USE_64BIT_INDICES) 2119 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2120 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2121 if (iswin) op = MPI_REPLACE; 2122 #endif 2123 2124 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2125 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2126 for (i=0; i<maxleaf - minleaf + 1; i++) { 2127 reorderedRemotePointsA[i].rank = -1; 2128 reorderedRemotePointsA[i].index = -1; 2129 } 2130 if (localPointsA) { 2131 for (i=0; i<numLeavesA; i++) { 2132 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2133 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2134 } 2135 } else { 2136 for (i=0; i<numLeavesA; i++) { 2137 if (i > maxleaf || i < minleaf) continue; 2138 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2139 } 2140 } 2141 2142 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2143 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2144 for (i=0; i<numRootsB; i++) { 2145 remotePointsBA[i].rank = -1; 2146 remotePointsBA[i].index = -1; 2147 } 2148 2149 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2150 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2151 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2152 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2153 if (remotePointsBA[i].rank == -1) continue; 2154 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2155 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2156 localPointsBA[numLeavesBA] = i; 2157 numLeavesBA++; 2158 } 2159 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2160 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2161 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 /* 2166 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2167 2168 Input Parameters: 2169 . sf - The global PetscSF 2170 2171 Output Parameters: 2172 . out - The local PetscSF 2173 */ 2174 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2175 { 2176 MPI_Comm comm; 2177 PetscMPIInt myrank; 2178 const PetscInt *ilocal; 2179 const PetscSFNode *iremote; 2180 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2181 PetscSFNode *liremote; 2182 PetscSF lsf; 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2187 if (sf->ops->CreateLocalSF) { 2188 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2189 } else { 2190 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2191 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2192 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2193 2194 /* Find out local edges and build a local SF */ 2195 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2196 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2197 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2198 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2199 2200 for (i=j=0; i<nleaves; i++) { 2201 if (iremote[i].rank == (PetscInt)myrank) { 2202 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2203 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2204 liremote[j].index = iremote[i].index; 2205 j++; 2206 } 2207 } 2208 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2209 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2210 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2211 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2212 *out = lsf; 2213 } 2214 PetscFunctionReturn(0); 2215 } 2216 2217 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2218 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2219 { 2220 PetscErrorCode ierr; 2221 PetscMemType rootmtype,leafmtype; 2222 2223 PetscFunctionBegin; 2224 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2225 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2226 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2227 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2228 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2229 if (sf->ops->BcastToZero) { 2230 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2231 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2232 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 2236