1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) do { \ 19 if (PetscUnlikely(!(sf)->graphset)) \ 20 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 21 } while (0) 22 #else 23 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 28 29 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 30 { 31 PetscFunctionBegin; 32 PetscValidPointer(type,2); 33 *type = PETSC_MEMTYPE_HOST; 34 #if defined(PETSC_HAVE_CUDA) 35 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA) && data) { 36 cudaError_t cerr; 37 struct cudaPointerAttributes attr; 38 enum cudaMemoryType mtype; 39 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 40 cudaGetLastError(); /* Reset the last error */ 41 #if (CUDART_VERSION < 10000) 42 mtype = attr.memoryType; 43 #else 44 mtype = attr.type; 45 #endif 46 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 47 } 48 #endif 49 50 #if defined(PETSC_HAVE_HIP) 51 if (PetscDeviceInitialized(PETSC_DEVICE_HIP) && data) { 52 hipError_t cerr; 53 struct hipPointerAttribute_t attr; 54 enum hipMemoryType mtype; 55 cerr = hipPointerGetAttributes(&attr,data); 56 hipGetLastError(); /* Reset the last error */ 57 mtype = attr.memoryType; 58 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 59 } 60 #endif 61 PetscFunctionReturn(0); 62 } 63 64 /*@ 65 PetscSFCreate - create a star forest communication context 66 67 Collective 68 69 Input Parameter: 70 . comm - communicator on which the star forest will operate 71 72 Output Parameter: 73 . sf - new star forest context 74 75 Options Database Keys: 76 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 77 . -sf_type window -Use MPI-3 one-sided window for communication 78 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 79 80 Level: intermediate 81 82 Notes: 83 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 84 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 85 SFs are optimized and they have better performance than general SFs. 86 87 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 88 @*/ 89 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 90 { 91 PetscErrorCode ierr; 92 PetscSF b; 93 94 PetscFunctionBegin; 95 PetscValidPointer(sf,2); 96 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 97 98 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 99 100 b->nroots = -1; 101 b->nleaves = -1; 102 b->minleaf = PETSC_MAX_INT; 103 b->maxleaf = PETSC_MIN_INT; 104 b->nranks = -1; 105 b->rankorder = PETSC_TRUE; 106 b->ingroup = MPI_GROUP_NULL; 107 b->outgroup = MPI_GROUP_NULL; 108 b->graphset = PETSC_FALSE; 109 #if defined(PETSC_HAVE_DEVICE) 110 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 111 b->use_stream_aware_mpi = PETSC_FALSE; 112 b->unknown_input_stream= PETSC_FALSE; 113 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 114 b->backend = PETSCSF_BACKEND_KOKKOS; 115 #elif defined(PETSC_HAVE_CUDA) 116 b->backend = PETSCSF_BACKEND_CUDA; 117 #elif defined(PETSC_HAVE_HIP) 118 b->backend = PETSCSF_BACKEND_HIP; 119 #endif 120 121 #if defined(PETSC_HAVE_NVSHMEM) 122 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 123 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 124 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 125 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 126 #endif 127 #endif 128 b->vscat.from_n = -1; 129 b->vscat.to_n = -1; 130 b->vscat.unit = MPIU_SCALAR; 131 *sf = b; 132 PetscFunctionReturn(0); 133 } 134 135 /*@ 136 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 137 138 Collective 139 140 Input Parameter: 141 . sf - star forest 142 143 Level: advanced 144 145 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 146 @*/ 147 PetscErrorCode PetscSFReset(PetscSF sf) 148 { 149 PetscErrorCode ierr; 150 151 PetscFunctionBegin; 152 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 153 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 154 sf->nroots = -1; 155 sf->nleaves = -1; 156 sf->minleaf = PETSC_MAX_INT; 157 sf->maxleaf = PETSC_MIN_INT; 158 sf->mine = NULL; 159 sf->remote = NULL; 160 sf->graphset = PETSC_FALSE; 161 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 162 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 163 sf->nranks = -1; 164 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 165 sf->degreeknown = PETSC_FALSE; 166 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 167 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 168 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 169 if (sf->multi) sf->multi->multi = NULL; 170 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 171 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 172 173 #if defined(PETSC_HAVE_DEVICE) 174 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 175 #endif 176 177 sf->setupcalled = PETSC_FALSE; 178 PetscFunctionReturn(0); 179 } 180 181 /*@C 182 PetscSFSetType - Set the PetscSF communication implementation 183 184 Collective on PetscSF 185 186 Input Parameters: 187 + sf - the PetscSF context 188 - type - a known method 189 190 Options Database Key: 191 . -sf_type <type> - Sets the method; use -help for a list 192 of available methods (for instance, window, basic, neighbor) 193 194 Notes: 195 See "include/petscsf.h" for available methods (for instance) 196 + PETSCSFWINDOW - MPI-2/3 one-sided 197 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 198 199 Level: intermediate 200 201 .seealso: PetscSFType, PetscSFCreate() 202 @*/ 203 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 204 { 205 PetscErrorCode ierr,(*r)(PetscSF); 206 PetscBool match; 207 208 PetscFunctionBegin; 209 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 210 PetscValidCharPointer(type,2); 211 212 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 213 if (match) PetscFunctionReturn(0); 214 215 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 216 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 217 /* Destroy the previous PetscSF implementation context */ 218 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 219 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 220 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 221 ierr = (*r)(sf);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 /*@C 226 PetscSFGetType - Get the PetscSF communication implementation 227 228 Not Collective 229 230 Input Parameter: 231 . sf - the PetscSF context 232 233 Output Parameter: 234 . type - the PetscSF type name 235 236 Level: intermediate 237 238 .seealso: PetscSFSetType(), PetscSFCreate() 239 @*/ 240 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 241 { 242 PetscFunctionBegin; 243 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 244 PetscValidPointer(type,2); 245 *type = ((PetscObject)sf)->type_name; 246 PetscFunctionReturn(0); 247 } 248 249 /*@C 250 PetscSFDestroy - destroy star forest 251 252 Collective 253 254 Input Parameter: 255 . sf - address of star forest 256 257 Level: intermediate 258 259 .seealso: PetscSFCreate(), PetscSFReset() 260 @*/ 261 PetscErrorCode PetscSFDestroy(PetscSF *sf) 262 { 263 PetscErrorCode ierr; 264 265 PetscFunctionBegin; 266 if (!*sf) PetscFunctionReturn(0); 267 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 268 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 269 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 270 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 271 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 272 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 273 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 278 { 279 PetscInt i, nleaves; 280 PetscMPIInt size; 281 const PetscInt *ilocal; 282 const PetscSFNode *iremote; 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 287 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 288 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 289 for (i = 0; i < nleaves; i++) { 290 const PetscInt rank = iremote[i].rank; 291 const PetscInt remote = iremote[i].index; 292 const PetscInt leaf = ilocal ? ilocal[i] : i; 293 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 294 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 295 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 296 } 297 PetscFunctionReturn(0); 298 } 299 300 /*@ 301 PetscSFSetUp - set up communication structures 302 303 Collective 304 305 Input Parameter: 306 . sf - star forest communication object 307 308 Level: beginner 309 310 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 311 @*/ 312 PetscErrorCode PetscSFSetUp(PetscSF sf) 313 { 314 PetscErrorCode ierr; 315 316 PetscFunctionBegin; 317 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 318 PetscSFCheckGraphSet(sf,1); 319 if (sf->setupcalled) PetscFunctionReturn(0); 320 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 321 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 322 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 323 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 324 #if defined(PETSC_HAVE_CUDA) 325 if (sf->backend == PETSCSF_BACKEND_CUDA) { 326 sf->ops->Malloc = PetscSFMalloc_CUDA; 327 sf->ops->Free = PetscSFFree_CUDA; 328 } 329 #endif 330 #if defined(PETSC_HAVE_HIP) 331 if (sf->backend == PETSCSF_BACKEND_HIP) { 332 sf->ops->Malloc = PetscSFMalloc_HIP; 333 sf->ops->Free = PetscSFFree_HIP; 334 } 335 #endif 336 337 # 338 #if defined(PETSC_HAVE_KOKKOS) 339 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 340 sf->ops->Malloc = PetscSFMalloc_Kokkos; 341 sf->ops->Free = PetscSFFree_Kokkos; 342 } 343 #endif 344 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 345 sf->setupcalled = PETSC_TRUE; 346 PetscFunctionReturn(0); 347 } 348 349 /*@ 350 PetscSFSetFromOptions - set PetscSF options using the options database 351 352 Logically Collective 353 354 Input Parameter: 355 . sf - star forest 356 357 Options Database Keys: 358 + -sf_type - implementation type, see PetscSFSetType() 359 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 360 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 361 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 362 If true, this option only works with -use_gpu_aware_mpi 1. 363 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 364 If true, this option only works with -use_gpu_aware_mpi 1. 365 366 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 367 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 368 the only available is kokkos. 369 370 Level: intermediate 371 @*/ 372 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 373 { 374 PetscSFType deft; 375 char type[256]; 376 PetscErrorCode ierr; 377 PetscBool flg; 378 379 PetscFunctionBegin; 380 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 381 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 382 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 383 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 384 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 385 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 386 #if defined(PETSC_HAVE_DEVICE) 387 { 388 char backendstr[32] = {0}; 389 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 390 /* Change the defaults set in PetscSFCreate() with command line options */ 391 ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 392 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 393 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 394 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 395 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 396 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 397 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 398 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 399 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 400 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 401 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 402 #elif defined(PETSC_HAVE_KOKKOS) 403 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 404 #endif 405 } 406 #endif 407 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 408 ierr = PetscOptionsEnd();CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 /*@ 413 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 414 415 Logically Collective 416 417 Input Parameters: 418 + sf - star forest 419 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 420 421 Level: advanced 422 423 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 424 @*/ 425 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 426 { 427 PetscFunctionBegin; 428 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 429 PetscValidLogicalCollectiveBool(sf,flg,2); 430 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 431 sf->rankorder = flg; 432 PetscFunctionReturn(0); 433 } 434 435 /*@C 436 PetscSFSetGraph - Set a parallel star forest 437 438 Collective 439 440 Input Parameters: 441 + sf - star forest 442 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 443 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 444 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 445 during setup in debug mode) 446 . localmode - copy mode for ilocal 447 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 448 during setup in debug mode) 449 - remotemode - copy mode for iremote 450 451 Level: intermediate 452 453 Notes: 454 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 455 456 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 457 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 458 needed 459 460 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 461 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 462 463 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 464 @*/ 465 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 466 { 467 PetscErrorCode ierr; 468 469 PetscFunctionBegin; 470 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 471 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 472 if (nleaves > 0) PetscValidPointer(iremote,6); 473 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 474 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 475 476 if (sf->nroots >= 0) { /* Reset only if graph already set */ 477 ierr = PetscSFReset(sf);CHKERRQ(ierr); 478 } 479 480 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 481 482 sf->nroots = nroots; 483 sf->nleaves = nleaves; 484 485 if (nleaves && ilocal) { 486 PetscInt i; 487 PetscInt minleaf = PETSC_MAX_INT; 488 PetscInt maxleaf = PETSC_MIN_INT; 489 int contiguous = 1; 490 for (i=0; i<nleaves; i++) { 491 minleaf = PetscMin(minleaf,ilocal[i]); 492 maxleaf = PetscMax(maxleaf,ilocal[i]); 493 contiguous &= (ilocal[i] == i); 494 } 495 sf->minleaf = minleaf; 496 sf->maxleaf = maxleaf; 497 if (contiguous) { 498 if (localmode == PETSC_OWN_POINTER) { 499 ierr = PetscFree(ilocal);CHKERRQ(ierr); 500 } 501 ilocal = NULL; 502 } 503 } else { 504 sf->minleaf = 0; 505 sf->maxleaf = nleaves - 1; 506 } 507 508 if (ilocal) { 509 switch (localmode) { 510 case PETSC_COPY_VALUES: 511 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 512 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 513 sf->mine = sf->mine_alloc; 514 break; 515 case PETSC_OWN_POINTER: 516 sf->mine_alloc = (PetscInt*)ilocal; 517 sf->mine = sf->mine_alloc; 518 break; 519 case PETSC_USE_POINTER: 520 sf->mine_alloc = NULL; 521 sf->mine = (PetscInt*)ilocal; 522 break; 523 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 524 } 525 } 526 527 switch (remotemode) { 528 case PETSC_COPY_VALUES: 529 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 530 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 531 sf->remote = sf->remote_alloc; 532 break; 533 case PETSC_OWN_POINTER: 534 sf->remote_alloc = (PetscSFNode*)iremote; 535 sf->remote = sf->remote_alloc; 536 break; 537 case PETSC_USE_POINTER: 538 sf->remote_alloc = NULL; 539 sf->remote = (PetscSFNode*)iremote; 540 break; 541 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 542 } 543 544 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 545 sf->graphset = PETSC_TRUE; 546 PetscFunctionReturn(0); 547 } 548 549 /*@ 550 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 551 552 Collective 553 554 Input Parameters: 555 + sf - The PetscSF 556 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 557 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 558 559 Notes: 560 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 561 n and N are local and global sizes of x respectively. 562 563 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 564 sequential vectors y on all ranks. 565 566 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 567 sequential vector y on rank 0. 568 569 In above cases, entries of x are roots and entries of y are leaves. 570 571 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 572 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 573 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 574 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 575 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 576 577 In this case, roots and leaves are symmetric. 578 579 Level: intermediate 580 @*/ 581 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 582 { 583 MPI_Comm comm; 584 PetscInt n,N,res[2]; 585 PetscMPIInt rank,size; 586 PetscSFType type; 587 PetscErrorCode ierr; 588 589 PetscFunctionBegin; 590 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 591 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 592 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 593 if (PetscUnlikely((pattern < PETSCSF_PATTERN_ALLGATHER) || (pattern > PETSCSF_PATTERN_ALLTOALL))) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 594 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 595 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 596 597 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 598 type = PETSCSFALLTOALL; 599 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 600 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 601 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 602 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 603 } else { 604 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 605 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 606 res[0] = n; 607 res[1] = -n; 608 /* Check if n are same over all ranks so that we can optimize it */ 609 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 610 if (res[0] == -res[1]) { /* same n */ 611 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 612 } else { 613 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 614 } 615 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 616 } 617 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 618 619 sf->pattern = pattern; 620 sf->mine = NULL; /* Contiguous */ 621 622 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 623 Also set other easy stuff. 624 */ 625 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 626 sf->nleaves = N; 627 sf->nroots = n; 628 sf->nranks = size; 629 sf->minleaf = 0; 630 sf->maxleaf = N - 1; 631 } else if (pattern == PETSCSF_PATTERN_GATHER) { 632 sf->nleaves = rank ? 0 : N; 633 sf->nroots = n; 634 sf->nranks = rank ? 0 : size; 635 sf->minleaf = 0; 636 sf->maxleaf = rank ? -1 : N - 1; 637 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 638 sf->nleaves = size; 639 sf->nroots = size; 640 sf->nranks = size; 641 sf->minleaf = 0; 642 sf->maxleaf = size - 1; 643 } 644 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 645 sf->graphset = PETSC_TRUE; 646 PetscFunctionReturn(0); 647 } 648 649 /*@ 650 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 651 652 Collective 653 654 Input Parameter: 655 . sf - star forest to invert 656 657 Output Parameter: 658 . isf - inverse of sf 659 660 Level: advanced 661 662 Notes: 663 All roots must have degree 1. 664 665 The local space may be a permutation, but cannot be sparse. 666 667 .seealso: PetscSFSetGraph() 668 @*/ 669 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 670 { 671 PetscErrorCode ierr; 672 PetscMPIInt rank; 673 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 674 const PetscInt *ilocal; 675 PetscSFNode *roots,*leaves; 676 677 PetscFunctionBegin; 678 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 679 PetscSFCheckGraphSet(sf,1); 680 PetscValidPointer(isf,2); 681 682 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 683 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 684 685 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 686 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 687 for (i=0; i<maxlocal; i++) { 688 leaves[i].rank = rank; 689 leaves[i].index = i; 690 } 691 for (i=0; i <nroots; i++) { 692 roots[i].rank = -1; 693 roots[i].index = -1; 694 } 695 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 696 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 697 698 /* Check whether our leaves are sparse */ 699 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 700 if (count == nroots) newilocal = NULL; 701 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 702 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 703 for (i=0,count=0; i<nroots; i++) { 704 if (roots[i].rank >= 0) { 705 newilocal[count] = i; 706 roots[count].rank = roots[i].rank; 707 roots[count].index = roots[i].index; 708 count++; 709 } 710 } 711 } 712 713 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 714 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 715 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 /*@ 720 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 721 722 Collective 723 724 Input Parameters: 725 + sf - communication object to duplicate 726 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 727 728 Output Parameter: 729 . newsf - new communication object 730 731 Level: beginner 732 733 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 734 @*/ 735 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 736 { 737 PetscSFType type; 738 PetscErrorCode ierr; 739 MPI_Datatype dtype=MPIU_SCALAR; 740 741 PetscFunctionBegin; 742 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 743 PetscValidLogicalCollectiveEnum(sf,opt,2); 744 PetscValidPointer(newsf,3); 745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 746 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 747 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 748 if (opt == PETSCSF_DUPLICATE_GRAPH) { 749 PetscSFCheckGraphSet(sf,1); 750 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 751 PetscInt nroots,nleaves; 752 const PetscInt *ilocal; 753 const PetscSFNode *iremote; 754 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 755 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 756 } else { 757 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 758 } 759 } 760 /* Since oldtype is committed, so is newtype, according to MPI */ 761 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 762 (*newsf)->vscat.bs = sf->vscat.bs; 763 (*newsf)->vscat.unit = dtype; 764 (*newsf)->vscat.to_n = sf->vscat.to_n; 765 (*newsf)->vscat.from_n = sf->vscat.from_n; 766 /* Do not copy lsf. Build it on demand since it is rarely used */ 767 768 #if defined(PETSC_HAVE_DEVICE) 769 (*newsf)->backend = sf->backend; 770 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 771 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 772 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 773 #endif 774 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 775 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 776 PetscFunctionReturn(0); 777 } 778 779 /*@C 780 PetscSFGetGraph - Get the graph specifying a parallel star forest 781 782 Not Collective 783 784 Input Parameter: 785 . sf - star forest 786 787 Output Parameters: 788 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 789 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 790 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 791 - iremote - remote locations of root vertices for each leaf on the current process 792 793 Notes: 794 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 795 796 Fortran Notes: 797 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 798 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 799 800 To check for a NULL ilocal use 801 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 802 803 Level: intermediate 804 805 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 806 @*/ 807 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 808 { 809 PetscErrorCode ierr; 810 811 PetscFunctionBegin; 812 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 813 if (sf->ops->GetGraph) { 814 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 815 } else { 816 if (nroots) *nroots = sf->nroots; 817 if (nleaves) *nleaves = sf->nleaves; 818 if (ilocal) *ilocal = sf->mine; 819 if (iremote) *iremote = sf->remote; 820 } 821 PetscFunctionReturn(0); 822 } 823 824 /*@ 825 PetscSFGetLeafRange - Get the active leaf ranges 826 827 Not Collective 828 829 Input Parameter: 830 . sf - star forest 831 832 Output Parameters: 833 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 834 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 835 836 Level: developer 837 838 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 839 @*/ 840 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 841 { 842 PetscFunctionBegin; 843 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 844 PetscSFCheckGraphSet(sf,1); 845 if (minleaf) *minleaf = sf->minleaf; 846 if (maxleaf) *maxleaf = sf->maxleaf; 847 PetscFunctionReturn(0); 848 } 849 850 /*@C 851 PetscSFViewFromOptions - View from Options 852 853 Collective on PetscSF 854 855 Input Parameters: 856 + A - the star forest 857 . obj - Optional object 858 - name - command line option 859 860 Level: intermediate 861 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 862 @*/ 863 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 864 { 865 PetscErrorCode ierr; 866 867 PetscFunctionBegin; 868 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 869 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 /*@C 874 PetscSFView - view a star forest 875 876 Collective 877 878 Input Parameters: 879 + sf - star forest 880 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 881 882 Level: beginner 883 884 .seealso: PetscSFCreate(), PetscSFSetGraph() 885 @*/ 886 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 887 { 888 PetscErrorCode ierr; 889 PetscBool iascii; 890 PetscViewerFormat format; 891 892 PetscFunctionBegin; 893 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 894 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 895 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 896 PetscCheckSameComm(sf,1,viewer,2); 897 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 898 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 899 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 900 PetscMPIInt rank; 901 PetscInt ii,i,j; 902 903 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 904 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 905 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 906 if (!sf->graphset) { 907 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 908 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 912 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 913 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 914 for (i=0; i<sf->nleaves; i++) { 915 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 916 } 917 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 918 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 919 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 920 PetscMPIInt *tmpranks,*perm; 921 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 922 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 923 for (i=0; i<sf->nranks; i++) perm[i] = i; 924 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 925 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 926 for (ii=0; ii<sf->nranks; ii++) { 927 i = perm[ii]; 928 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 929 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 930 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 931 } 932 } 933 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 934 } 935 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 936 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 937 } 938 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 939 } 940 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 941 PetscFunctionReturn(0); 942 } 943 944 /*@C 945 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 946 947 Not Collective 948 949 Input Parameter: 950 . sf - star forest 951 952 Output Parameters: 953 + nranks - number of ranks referenced by local part 954 . ranks - array of ranks 955 . roffset - offset in rmine/rremote for each rank (length nranks+1) 956 . rmine - concatenated array holding local indices referencing each remote rank 957 - rremote - concatenated array holding remote indices referenced for each remote rank 958 959 Level: developer 960 961 .seealso: PetscSFGetLeafRanks() 962 @*/ 963 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 964 { 965 PetscErrorCode ierr; 966 967 PetscFunctionBegin; 968 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 969 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 970 if (sf->ops->GetRootRanks) { 971 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 972 } else { 973 /* The generic implementation */ 974 if (nranks) *nranks = sf->nranks; 975 if (ranks) *ranks = sf->ranks; 976 if (roffset) *roffset = sf->roffset; 977 if (rmine) *rmine = sf->rmine; 978 if (rremote) *rremote = sf->rremote; 979 } 980 PetscFunctionReturn(0); 981 } 982 983 /*@C 984 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 985 986 Not Collective 987 988 Input Parameter: 989 . sf - star forest 990 991 Output Parameters: 992 + niranks - number of leaf ranks referencing roots on this process 993 . iranks - array of ranks 994 . ioffset - offset in irootloc for each rank (length niranks+1) 995 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 996 997 Level: developer 998 999 .seealso: PetscSFGetRootRanks() 1000 @*/ 1001 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 1002 { 1003 PetscErrorCode ierr; 1004 1005 PetscFunctionBegin; 1006 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1007 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 1008 if (sf->ops->GetLeafRanks) { 1009 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 1010 } else { 1011 PetscSFType type; 1012 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 1013 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1014 } 1015 PetscFunctionReturn(0); 1016 } 1017 1018 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 1019 PetscInt i; 1020 for (i=0; i<n; i++) { 1021 if (needle == list[i]) return PETSC_TRUE; 1022 } 1023 return PETSC_FALSE; 1024 } 1025 1026 /*@C 1027 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 1028 1029 Collective 1030 1031 Input Parameters: 1032 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1033 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1034 1035 Level: developer 1036 1037 .seealso: PetscSFGetRootRanks() 1038 @*/ 1039 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1040 { 1041 PetscErrorCode ierr; 1042 PetscTable table; 1043 PetscTablePosition pos; 1044 PetscMPIInt size,groupsize,*groupranks; 1045 PetscInt *rcount,*ranks; 1046 PetscInt i, irank = -1,orank = -1; 1047 1048 PetscFunctionBegin; 1049 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1050 PetscSFCheckGraphSet(sf,1); 1051 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1052 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1053 for (i=0; i<sf->nleaves; i++) { 1054 /* Log 1-based rank */ 1055 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1056 } 1057 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1058 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1059 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1060 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1061 for (i=0; i<sf->nranks; i++) { 1062 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1063 ranks[i]--; /* Convert back to 0-based */ 1064 } 1065 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1066 1067 /* We expect that dgroup is reliably "small" while nranks could be large */ 1068 { 1069 MPI_Group group = MPI_GROUP_NULL; 1070 PetscMPIInt *dgroupranks; 1071 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1072 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1073 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1074 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1075 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1076 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1077 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1078 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1079 } 1080 1081 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1082 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1083 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1084 if (InList(ranks[i],groupsize,groupranks)) break; 1085 } 1086 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1087 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1088 } 1089 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1090 PetscInt tmprank,tmpcount; 1091 1092 tmprank = ranks[i]; 1093 tmpcount = rcount[i]; 1094 ranks[i] = ranks[sf->ndranks]; 1095 rcount[i] = rcount[sf->ndranks]; 1096 ranks[sf->ndranks] = tmprank; 1097 rcount[sf->ndranks] = tmpcount; 1098 sf->ndranks++; 1099 } 1100 } 1101 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1102 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1103 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1104 sf->roffset[0] = 0; 1105 for (i=0; i<sf->nranks; i++) { 1106 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1107 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1108 rcount[i] = 0; 1109 } 1110 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1111 /* short circuit */ 1112 if (orank != sf->remote[i].rank) { 1113 /* Search for index of iremote[i].rank in sf->ranks */ 1114 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1115 if (irank < 0) { 1116 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1117 if (irank >= 0) irank += sf->ndranks; 1118 } 1119 orank = sf->remote[i].rank; 1120 } 1121 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1122 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1123 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1124 rcount[irank]++; 1125 } 1126 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 /*@C 1131 PetscSFGetGroups - gets incoming and outgoing process groups 1132 1133 Collective 1134 1135 Input Parameter: 1136 . sf - star forest 1137 1138 Output Parameters: 1139 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1140 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1141 1142 Level: developer 1143 1144 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1145 @*/ 1146 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1147 { 1148 PetscErrorCode ierr; 1149 MPI_Group group = MPI_GROUP_NULL; 1150 1151 PetscFunctionBegin; 1152 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1153 if (sf->ingroup == MPI_GROUP_NULL) { 1154 PetscInt i; 1155 const PetscInt *indegree; 1156 PetscMPIInt rank,*outranks,*inranks; 1157 PetscSFNode *remote; 1158 PetscSF bgcount; 1159 1160 /* Compute the number of incoming ranks */ 1161 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1162 for (i=0; i<sf->nranks; i++) { 1163 remote[i].rank = sf->ranks[i]; 1164 remote[i].index = 0; 1165 } 1166 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1167 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1168 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1169 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1170 /* Enumerate the incoming ranks */ 1171 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1172 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1173 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1174 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1175 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1176 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1177 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1178 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1179 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1180 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1181 } 1182 *incoming = sf->ingroup; 1183 1184 if (sf->outgroup == MPI_GROUP_NULL) { 1185 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1186 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1187 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1188 } 1189 *outgoing = sf->outgroup; 1190 PetscFunctionReturn(0); 1191 } 1192 1193 /*@ 1194 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1195 1196 Collective 1197 1198 Input Parameter: 1199 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1200 1201 Output Parameter: 1202 . multi - star forest with split roots, such that each root has degree exactly 1 1203 1204 Level: developer 1205 1206 Notes: 1207 1208 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1209 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1210 edge, it is a candidate for future optimization that might involve its removal. 1211 1212 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1213 @*/ 1214 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1215 { 1216 PetscErrorCode ierr; 1217 1218 PetscFunctionBegin; 1219 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1220 PetscValidPointer(multi,2); 1221 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1222 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1223 *multi = sf->multi; 1224 sf->multi->multi = sf->multi; 1225 PetscFunctionReturn(0); 1226 } 1227 if (!sf->multi) { 1228 const PetscInt *indegree; 1229 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1230 PetscSFNode *remote; 1231 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1232 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1233 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1234 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1235 inoffset[0] = 0; 1236 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1237 for (i=0; i<maxlocal; i++) outones[i] = 1; 1238 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1239 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1240 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1241 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1242 for (i=0; i<sf->nroots; i++) { 1243 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1244 } 1245 } 1246 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1247 for (i=0; i<sf->nleaves; i++) { 1248 remote[i].rank = sf->remote[i].rank; 1249 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1250 } 1251 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1252 sf->multi->multi = sf->multi; 1253 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1254 if (sf->rankorder) { /* Sort the ranks */ 1255 PetscMPIInt rank; 1256 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1257 PetscSFNode *newremote; 1258 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1259 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1260 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1261 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1262 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1263 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1264 /* Sort the incoming ranks at each vertex, build the inverse map */ 1265 for (i=0; i<sf->nroots; i++) { 1266 PetscInt j; 1267 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1268 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1269 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1270 } 1271 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1272 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1273 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1274 for (i=0; i<sf->nleaves; i++) { 1275 newremote[i].rank = sf->remote[i].rank; 1276 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1277 } 1278 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1279 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1280 } 1281 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1282 } 1283 *multi = sf->multi; 1284 PetscFunctionReturn(0); 1285 } 1286 1287 /*@C 1288 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1289 1290 Collective 1291 1292 Input Parameters: 1293 + sf - original star forest 1294 . nselected - number of selected roots on this process 1295 - selected - indices of the selected roots on this process 1296 1297 Output Parameter: 1298 . esf - new star forest 1299 1300 Level: advanced 1301 1302 Note: 1303 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1304 be done by calling PetscSFGetGraph(). 1305 1306 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1307 @*/ 1308 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1309 { 1310 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1311 const PetscInt *ilocal; 1312 signed char *rootdata,*leafdata,*leafmem; 1313 const PetscSFNode *iremote; 1314 PetscSFNode *new_iremote; 1315 MPI_Comm comm; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1320 PetscSFCheckGraphSet(sf,1); 1321 if (nselected) PetscValidPointer(selected,3); 1322 PetscValidPointer(esf,4); 1323 1324 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1325 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1326 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1327 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1328 1329 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1330 PetscBool dups; 1331 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1332 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1333 for (i=0; i<nselected; i++) 1334 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1335 } 1336 1337 if (sf->ops->CreateEmbeddedRootSF) { 1338 ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1339 } else { 1340 /* A generic version of creating embedded sf */ 1341 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1342 maxlocal = maxleaf - minleaf + 1; 1343 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1344 leafdata = leafmem - minleaf; 1345 /* Tag selected roots and bcast to leaves */ 1346 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1347 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1348 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1349 1350 /* Build esf with leaves that are still connected */ 1351 esf_nleaves = 0; 1352 for (i=0; i<nleaves; i++) { 1353 j = ilocal ? ilocal[i] : i; 1354 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1355 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1356 */ 1357 esf_nleaves += (leafdata[j] ? 1 : 0); 1358 } 1359 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1360 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1361 for (i=n=0; i<nleaves; i++) { 1362 j = ilocal ? ilocal[i] : i; 1363 if (leafdata[j]) { 1364 new_ilocal[n] = j; 1365 new_iremote[n].rank = iremote[i].rank; 1366 new_iremote[n].index = iremote[i].index; 1367 ++n; 1368 } 1369 } 1370 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1371 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1372 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1373 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1374 } 1375 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 /*@C 1380 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1381 1382 Collective 1383 1384 Input Parameters: 1385 + sf - original star forest 1386 . nselected - number of selected leaves on this process 1387 - selected - indices of the selected leaves on this process 1388 1389 Output Parameter: 1390 . newsf - new star forest 1391 1392 Level: advanced 1393 1394 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1395 @*/ 1396 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1397 { 1398 const PetscSFNode *iremote; 1399 PetscSFNode *new_iremote; 1400 const PetscInt *ilocal; 1401 PetscInt i,nroots,*leaves,*new_ilocal; 1402 MPI_Comm comm; 1403 PetscErrorCode ierr; 1404 1405 PetscFunctionBegin; 1406 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1407 PetscSFCheckGraphSet(sf,1); 1408 if (nselected) PetscValidPointer(selected,3); 1409 PetscValidPointer(newsf,4); 1410 1411 /* Uniq selected[] and put results in leaves[] */ 1412 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1413 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1414 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1415 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1416 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1417 1418 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1419 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1420 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1421 } else { 1422 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1423 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1424 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1425 for (i=0; i<nselected; ++i) { 1426 const PetscInt l = leaves[i]; 1427 new_ilocal[i] = ilocal ? ilocal[l] : l; 1428 new_iremote[i].rank = iremote[l].rank; 1429 new_iremote[i].index = iremote[l].index; 1430 } 1431 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1432 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(leaves);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 /*@C 1439 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1440 1441 Collective on PetscSF 1442 1443 Input Parameters: 1444 + sf - star forest on which to communicate 1445 . unit - data type associated with each node 1446 . rootdata - buffer to broadcast 1447 - op - operation to use for reduction 1448 1449 Output Parameter: 1450 . leafdata - buffer to be reduced with values from each leaf's respective root 1451 1452 Level: intermediate 1453 1454 Notes: 1455 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1456 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1457 use PetscSFBcastWithMemTypeBegin() instead. 1458 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1459 @*/ 1460 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1461 { 1462 PetscErrorCode ierr; 1463 PetscMemType rootmtype,leafmtype; 1464 1465 PetscFunctionBegin; 1466 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1467 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1468 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1469 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1470 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1471 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1472 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1473 PetscFunctionReturn(0); 1474 } 1475 1476 /*@C 1477 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1478 1479 Collective on PetscSF 1480 1481 Input Parameters: 1482 + sf - star forest on which to communicate 1483 . unit - data type associated with each node 1484 . rootmtype - memory type of rootdata 1485 . rootdata - buffer to broadcast 1486 . leafmtype - memory type of leafdata 1487 - op - operation to use for reduction 1488 1489 Output Parameter: 1490 . leafdata - buffer to be reduced with values from each leaf's respective root 1491 1492 Level: intermediate 1493 1494 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1495 @*/ 1496 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1497 { 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1502 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1503 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1504 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1505 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1506 PetscFunctionReturn(0); 1507 } 1508 1509 /*@C 1510 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1511 1512 Collective 1513 1514 Input Parameters: 1515 + sf - star forest 1516 . unit - data type 1517 . rootdata - buffer to broadcast 1518 - op - operation to use for reduction 1519 1520 Output Parameter: 1521 . leafdata - buffer to be reduced with values from each leaf's respective root 1522 1523 Level: intermediate 1524 1525 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1526 @*/ 1527 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1528 { 1529 PetscErrorCode ierr; 1530 1531 PetscFunctionBegin; 1532 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1533 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1534 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1535 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1536 PetscFunctionReturn(0); 1537 } 1538 1539 /*@C 1540 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1541 1542 Collective 1543 1544 Input Parameters: 1545 + sf - star forest 1546 . unit - data type 1547 . leafdata - values to reduce 1548 - op - reduction operation 1549 1550 Output Parameter: 1551 . rootdata - result of reduction of values from all leaves of each root 1552 1553 Level: intermediate 1554 1555 Notes: 1556 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1557 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1558 use PetscSFReduceWithMemTypeBegin() instead. 1559 1560 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1561 @*/ 1562 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1563 { 1564 PetscErrorCode ierr; 1565 PetscMemType rootmtype,leafmtype; 1566 1567 PetscFunctionBegin; 1568 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1569 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1570 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1571 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1572 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1573 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1574 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1575 PetscFunctionReturn(0); 1576 } 1577 1578 /*@C 1579 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1580 1581 Collective 1582 1583 Input Parameters: 1584 + sf - star forest 1585 . unit - data type 1586 . leafmtype - memory type of leafdata 1587 . leafdata - values to reduce 1588 . rootmtype - memory type of rootdata 1589 - op - reduction operation 1590 1591 Output Parameter: 1592 . rootdata - result of reduction of values from all leaves of each root 1593 1594 Level: intermediate 1595 1596 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1597 @*/ 1598 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1599 { 1600 PetscErrorCode ierr; 1601 1602 PetscFunctionBegin; 1603 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1604 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1605 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1606 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1607 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1608 PetscFunctionReturn(0); 1609 } 1610 1611 /*@C 1612 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1613 1614 Collective 1615 1616 Input Parameters: 1617 + sf - star forest 1618 . unit - data type 1619 . leafdata - values to reduce 1620 - op - reduction operation 1621 1622 Output Parameter: 1623 . rootdata - result of reduction of values from all leaves of each root 1624 1625 Level: intermediate 1626 1627 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1628 @*/ 1629 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1630 { 1631 PetscErrorCode ierr; 1632 1633 PetscFunctionBegin; 1634 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1635 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1636 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1637 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1638 PetscFunctionReturn(0); 1639 } 1640 1641 /*@C 1642 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1643 1644 Collective 1645 1646 Input Parameters: 1647 + sf - star forest 1648 . unit - data type 1649 . leafdata - leaf values to use in reduction 1650 - op - operation to use for reduction 1651 1652 Output Parameters: 1653 + rootdata - root values to be updated, input state is seen by first process to perform an update 1654 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1655 1656 Level: advanced 1657 1658 Note: 1659 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1660 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1661 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1662 integers. 1663 1664 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1665 @*/ 1666 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1667 { 1668 PetscErrorCode ierr; 1669 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1670 1671 PetscFunctionBegin; 1672 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1673 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1674 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1675 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1676 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1677 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1678 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1679 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1680 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1681 PetscFunctionReturn(0); 1682 } 1683 1684 /*@C 1685 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1686 1687 Collective 1688 1689 Input Parameters: 1690 + sf - star forest 1691 . unit - data type 1692 . leafdata - leaf values to use in reduction 1693 - op - operation to use for reduction 1694 1695 Output Parameters: 1696 + rootdata - root values to be updated, input state is seen by first process to perform an update 1697 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1698 1699 Level: advanced 1700 1701 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1702 @*/ 1703 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1704 { 1705 PetscErrorCode ierr; 1706 1707 PetscFunctionBegin; 1708 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1709 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1710 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1711 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 /*@C 1716 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1717 1718 Collective 1719 1720 Input Parameter: 1721 . sf - star forest 1722 1723 Output Parameter: 1724 . degree - degree of each root vertex 1725 1726 Level: advanced 1727 1728 Notes: 1729 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1730 1731 .seealso: PetscSFGatherBegin() 1732 @*/ 1733 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1734 { 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1739 PetscSFCheckGraphSet(sf,1); 1740 PetscValidPointer(degree,2); 1741 if (!sf->degreeknown) { 1742 PetscInt i, nroots = sf->nroots, maxlocal; 1743 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1744 maxlocal = sf->maxleaf-sf->minleaf+1; 1745 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1746 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1747 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1748 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1749 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1750 } 1751 *degree = NULL; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 /*@C 1756 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1757 1758 Collective 1759 1760 Input Parameter: 1761 . sf - star forest 1762 1763 Output Parameter: 1764 . degree - degree of each root vertex 1765 1766 Level: developer 1767 1768 Notes: 1769 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1770 1771 .seealso: 1772 @*/ 1773 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1774 { 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1779 PetscSFCheckGraphSet(sf,1); 1780 PetscValidPointer(degree,2); 1781 if (!sf->degreeknown) { 1782 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1783 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1784 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1785 sf->degreeknown = PETSC_TRUE; 1786 } 1787 *degree = sf->degree; 1788 PetscFunctionReturn(0); 1789 } 1790 1791 /*@C 1792 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1793 Each multi-root is assigned index of the corresponding original root. 1794 1795 Collective 1796 1797 Input Parameters: 1798 + sf - star forest 1799 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1800 1801 Output Parameters: 1802 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1803 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1804 1805 Level: developer 1806 1807 Notes: 1808 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1809 1810 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1811 @*/ 1812 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1813 { 1814 PetscSF msf; 1815 PetscInt i, j, k, nroots, nmroots; 1816 PetscErrorCode ierr; 1817 1818 PetscFunctionBegin; 1819 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1820 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1821 if (nroots) PetscValidIntPointer(degree,2); 1822 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1823 PetscValidPointer(multiRootsOrigNumbering,4); 1824 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1825 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1826 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1827 for (i=0,j=0,k=0; i<nroots; i++) { 1828 if (!degree[i]) continue; 1829 for (j=0; j<degree[i]; j++,k++) { 1830 (*multiRootsOrigNumbering)[k] = i; 1831 } 1832 } 1833 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1834 if (nMultiRoots) *nMultiRoots = nmroots; 1835 PetscFunctionReturn(0); 1836 } 1837 1838 /*@C 1839 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1840 1841 Collective 1842 1843 Input Parameters: 1844 + sf - star forest 1845 . unit - data type 1846 - leafdata - leaf data to gather to roots 1847 1848 Output Parameter: 1849 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1850 1851 Level: intermediate 1852 1853 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1854 @*/ 1855 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1856 { 1857 PetscErrorCode ierr; 1858 PetscSF multi = NULL; 1859 1860 PetscFunctionBegin; 1861 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1862 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1863 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1864 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1865 PetscFunctionReturn(0); 1866 } 1867 1868 /*@C 1869 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1870 1871 Collective 1872 1873 Input Parameters: 1874 + sf - star forest 1875 . unit - data type 1876 - leafdata - leaf data to gather to roots 1877 1878 Output Parameter: 1879 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1880 1881 Level: intermediate 1882 1883 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1884 @*/ 1885 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1886 { 1887 PetscErrorCode ierr; 1888 PetscSF multi = NULL; 1889 1890 PetscFunctionBegin; 1891 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1892 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1893 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 /*@C 1898 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1899 1900 Collective 1901 1902 Input Parameters: 1903 + sf - star forest 1904 . unit - data type 1905 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1906 1907 Output Parameter: 1908 . leafdata - leaf data to be update with personal data from each respective root 1909 1910 Level: intermediate 1911 1912 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1913 @*/ 1914 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1915 { 1916 PetscErrorCode ierr; 1917 PetscSF multi = NULL; 1918 1919 PetscFunctionBegin; 1920 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1921 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1922 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1923 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 /*@C 1928 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1929 1930 Collective 1931 1932 Input Parameters: 1933 + sf - star forest 1934 . unit - data type 1935 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1936 1937 Output Parameter: 1938 . leafdata - leaf data to be update with personal data from each respective root 1939 1940 Level: intermediate 1941 1942 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1943 @*/ 1944 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1945 { 1946 PetscErrorCode ierr; 1947 PetscSF multi = NULL; 1948 1949 PetscFunctionBegin; 1950 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1951 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1952 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1957 { 1958 PetscInt i, n, nleaves; 1959 const PetscInt *ilocal = NULL; 1960 PetscHSetI seen; 1961 PetscErrorCode ierr; 1962 1963 PetscFunctionBegin; 1964 if (PetscDefined(USE_DEBUG)) { 1965 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1966 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1967 for (i = 0; i < nleaves; i++) { 1968 const PetscInt leaf = ilocal ? ilocal[i] : i; 1969 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1970 } 1971 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1972 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1973 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1974 } 1975 PetscFunctionReturn(0); 1976 } 1977 1978 /*@ 1979 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1980 1981 Input Parameters: 1982 + sfA - The first PetscSF 1983 - sfB - The second PetscSF 1984 1985 Output Parameters: 1986 . sfBA - The composite SF 1987 1988 Level: developer 1989 1990 Notes: 1991 Currently, the two SFs must be defined on congruent communicators and they must be true star 1992 forests, i.e. the same leaf is not connected with different roots. 1993 1994 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1995 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1996 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1997 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1998 1999 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 2000 @*/ 2001 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2002 { 2003 PetscErrorCode ierr; 2004 const PetscSFNode *remotePointsA,*remotePointsB; 2005 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 2006 const PetscInt *localPointsA,*localPointsB; 2007 PetscInt *localPointsBA; 2008 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 2009 PetscBool denseB; 2010 2011 PetscFunctionBegin; 2012 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2013 PetscSFCheckGraphSet(sfA,1); 2014 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2015 PetscSFCheckGraphSet(sfB,2); 2016 PetscCheckSameComm(sfA,1,sfB,2); 2017 PetscValidPointer(sfBA,3); 2018 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2019 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2020 2021 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2022 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2023 if (localPointsA) { 2024 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2025 for (i=0; i<numRootsB; i++) { 2026 reorderedRemotePointsA[i].rank = -1; 2027 reorderedRemotePointsA[i].index = -1; 2028 } 2029 for (i=0; i<numLeavesA; i++) { 2030 if (localPointsA[i] >= numRootsB) continue; 2031 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2032 } 2033 remotePointsA = reorderedRemotePointsA; 2034 } 2035 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2036 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2037 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2038 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2039 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2040 2041 denseB = (PetscBool)!localPointsB; 2042 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2043 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2044 else numLeavesBA++; 2045 } 2046 if (denseB) { 2047 localPointsBA = NULL; 2048 remotePointsBA = leafdataB; 2049 } else { 2050 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2051 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2052 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2053 const PetscInt l = localPointsB ? localPointsB[i] : i; 2054 2055 if (leafdataB[l-minleaf].rank == -1) continue; 2056 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2057 localPointsBA[numLeavesBA] = l; 2058 numLeavesBA++; 2059 } 2060 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2061 } 2062 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2063 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2064 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /*@ 2069 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2070 2071 Input Parameters: 2072 + sfA - The first PetscSF 2073 - sfB - The second PetscSF 2074 2075 Output Parameters: 2076 . sfBA - The composite SF. 2077 2078 Level: developer 2079 2080 Notes: 2081 Currently, the two SFs must be defined on congruent communicators and they must be true star 2082 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2083 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2084 2085 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2086 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2087 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2088 a Reduce on sfB, on connected roots. 2089 2090 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2091 @*/ 2092 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2093 { 2094 PetscErrorCode ierr; 2095 const PetscSFNode *remotePointsA,*remotePointsB; 2096 PetscSFNode *remotePointsBA; 2097 const PetscInt *localPointsA,*localPointsB; 2098 PetscSFNode *reorderedRemotePointsA = NULL; 2099 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2100 MPI_Op op; 2101 #if defined(PETSC_USE_64BIT_INDICES) 2102 PetscBool iswin; 2103 #endif 2104 2105 PetscFunctionBegin; 2106 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2107 PetscSFCheckGraphSet(sfA,1); 2108 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2109 PetscSFCheckGraphSet(sfB,2); 2110 PetscCheckSameComm(sfA,1,sfB,2); 2111 PetscValidPointer(sfBA,3); 2112 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2113 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2114 2115 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2116 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2117 2118 /* TODO: Check roots of sfB have degree of 1 */ 2119 /* Once we implement it, we can replace the MPI_MAXLOC 2120 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2121 We use MPI_MAXLOC only to have a deterministic output from this routine if 2122 the root condition is not meet. 2123 */ 2124 op = MPI_MAXLOC; 2125 #if defined(PETSC_USE_64BIT_INDICES) 2126 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2127 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2128 if (iswin) op = MPI_REPLACE; 2129 #endif 2130 2131 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2132 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2133 for (i=0; i<maxleaf - minleaf + 1; i++) { 2134 reorderedRemotePointsA[i].rank = -1; 2135 reorderedRemotePointsA[i].index = -1; 2136 } 2137 if (localPointsA) { 2138 for (i=0; i<numLeavesA; i++) { 2139 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2140 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2141 } 2142 } else { 2143 for (i=0; i<numLeavesA; i++) { 2144 if (i > maxleaf || i < minleaf) continue; 2145 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2146 } 2147 } 2148 2149 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2150 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2151 for (i=0; i<numRootsB; i++) { 2152 remotePointsBA[i].rank = -1; 2153 remotePointsBA[i].index = -1; 2154 } 2155 2156 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2157 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2158 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2159 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2160 if (remotePointsBA[i].rank == -1) continue; 2161 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2162 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2163 localPointsBA[numLeavesBA] = i; 2164 numLeavesBA++; 2165 } 2166 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2167 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2168 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 /* 2173 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2174 2175 Input Parameters: 2176 . sf - The global PetscSF 2177 2178 Output Parameters: 2179 . out - The local PetscSF 2180 */ 2181 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2182 { 2183 MPI_Comm comm; 2184 PetscMPIInt myrank; 2185 const PetscInt *ilocal; 2186 const PetscSFNode *iremote; 2187 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2188 PetscSFNode *liremote; 2189 PetscSF lsf; 2190 PetscErrorCode ierr; 2191 2192 PetscFunctionBegin; 2193 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2194 if (sf->ops->CreateLocalSF) { 2195 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2196 } else { 2197 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2198 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2199 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2200 2201 /* Find out local edges and build a local SF */ 2202 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2203 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2204 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2205 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2206 2207 for (i=j=0; i<nleaves; i++) { 2208 if (iremote[i].rank == (PetscInt)myrank) { 2209 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2210 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2211 liremote[j].index = iremote[i].index; 2212 j++; 2213 } 2214 } 2215 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2216 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2217 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2218 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2219 *out = lsf; 2220 } 2221 PetscFunctionReturn(0); 2222 } 2223 2224 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2225 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2226 { 2227 PetscErrorCode ierr; 2228 PetscMemType rootmtype,leafmtype; 2229 2230 PetscFunctionBegin; 2231 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2232 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2233 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2234 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2235 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2236 if (sf->ops->BcastToZero) { 2237 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2238 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2239 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2240 PetscFunctionReturn(0); 2241 } 2242 2243