1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_USE_DEBUG) 15 # define PetscSFCheckGraphSet(sf,arg) do { \ 16 if (PetscUnlikely(!(sf)->graphset)) \ 17 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 18 } while (0) 19 #else 20 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 21 #endif 22 23 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 24 25 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 26 { 27 PetscFunctionBegin; 28 PetscValidPointer(type,2); 29 *type = PETSC_MEMTYPE_HOST; 30 #if defined(PETSC_HAVE_CUDA) 31 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA) && data) { 32 cudaError_t cerr; 33 struct cudaPointerAttributes attr; 34 enum cudaMemoryType mtype; 35 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 36 cudaGetLastError(); /* Reset the last error */ 37 #if (CUDART_VERSION < 10000) 38 mtype = attr.memoryType; 39 #else 40 mtype = attr.type; 41 #endif 42 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 43 } 44 #endif 45 46 #if defined(PETSC_HAVE_HIP) 47 if (PetscDeviceInitialized(PETSC_DEVICE_HIP) && data) { 48 hipError_t cerr; 49 struct hipPointerAttribute_t attr; 50 enum hipMemoryType mtype; 51 cerr = hipPointerGetAttributes(&attr,data); 52 hipGetLastError(); /* Reset the last error */ 53 mtype = attr.memoryType; 54 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 55 } 56 #endif 57 PetscFunctionReturn(0); 58 } 59 60 /*@ 61 PetscSFCreate - create a star forest communication context 62 63 Collective 64 65 Input Parameter: 66 . comm - communicator on which the star forest will operate 67 68 Output Parameter: 69 . sf - new star forest context 70 71 Options Database Keys: 72 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 73 . -sf_type window -Use MPI-3 one-sided window for communication 74 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 75 76 Level: intermediate 77 78 Notes: 79 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 80 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 81 SFs are optimized and they have better performance than general SFs. 82 83 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 84 @*/ 85 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 86 { 87 PetscErrorCode ierr; 88 PetscSF b; 89 90 PetscFunctionBegin; 91 PetscValidPointer(sf,2); 92 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 93 94 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 95 96 b->nroots = -1; 97 b->nleaves = -1; 98 b->minleaf = PETSC_MAX_INT; 99 b->maxleaf = PETSC_MIN_INT; 100 b->nranks = -1; 101 b->rankorder = PETSC_TRUE; 102 b->ingroup = MPI_GROUP_NULL; 103 b->outgroup = MPI_GROUP_NULL; 104 b->graphset = PETSC_FALSE; 105 #if defined(PETSC_HAVE_DEVICE) 106 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 107 b->use_stream_aware_mpi = PETSC_FALSE; 108 b->unknown_input_stream= PETSC_FALSE; 109 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 110 b->backend = PETSCSF_BACKEND_KOKKOS; 111 #elif defined(PETSC_HAVE_CUDA) 112 b->backend = PETSCSF_BACKEND_CUDA; 113 #elif defined(PETSC_HAVE_HIP) 114 b->backend = PETSCSF_BACKEND_HIP; 115 #endif 116 117 #if defined(PETSC_HAVE_NVSHMEM) 118 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 119 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 120 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 121 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 122 #endif 123 #endif 124 b->vscat.from_n = -1; 125 b->vscat.to_n = -1; 126 b->vscat.unit = MPIU_SCALAR; 127 *sf = b; 128 PetscFunctionReturn(0); 129 } 130 131 /*@ 132 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 133 134 Collective 135 136 Input Parameter: 137 . sf - star forest 138 139 Level: advanced 140 141 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 142 @*/ 143 PetscErrorCode PetscSFReset(PetscSF sf) 144 { 145 PetscErrorCode ierr; 146 147 PetscFunctionBegin; 148 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 149 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 150 sf->nroots = -1; 151 sf->nleaves = -1; 152 sf->minleaf = PETSC_MAX_INT; 153 sf->maxleaf = PETSC_MIN_INT; 154 sf->mine = NULL; 155 sf->remote = NULL; 156 sf->graphset = PETSC_FALSE; 157 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 158 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 159 sf->nranks = -1; 160 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 161 sf->degreeknown = PETSC_FALSE; 162 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 163 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 164 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 165 if (sf->multi) sf->multi->multi = NULL; 166 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 167 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 168 169 #if defined(PETSC_HAVE_DEVICE) 170 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 171 #endif 172 173 sf->setupcalled = PETSC_FALSE; 174 PetscFunctionReturn(0); 175 } 176 177 /*@C 178 PetscSFSetType - Set the PetscSF communication implementation 179 180 Collective on PetscSF 181 182 Input Parameters: 183 + sf - the PetscSF context 184 - type - a known method 185 186 Options Database Key: 187 . -sf_type <type> - Sets the method; use -help for a list 188 of available methods (for instance, window, basic, neighbor) 189 190 Notes: 191 See "include/petscsf.h" for available methods (for instance) 192 + PETSCSFWINDOW - MPI-2/3 one-sided 193 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 194 195 Level: intermediate 196 197 .seealso: PetscSFType, PetscSFCreate() 198 @*/ 199 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 200 { 201 PetscErrorCode ierr,(*r)(PetscSF); 202 PetscBool match; 203 204 PetscFunctionBegin; 205 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 206 PetscValidCharPointer(type,2); 207 208 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 209 if (match) PetscFunctionReturn(0); 210 211 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 212 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 213 /* Destroy the previous PetscSF implementation context */ 214 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 215 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 216 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 217 ierr = (*r)(sf);CHKERRQ(ierr); 218 PetscFunctionReturn(0); 219 } 220 221 /*@C 222 PetscSFGetType - Get the PetscSF communication implementation 223 224 Not Collective 225 226 Input Parameter: 227 . sf - the PetscSF context 228 229 Output Parameter: 230 . type - the PetscSF type name 231 232 Level: intermediate 233 234 .seealso: PetscSFSetType(), PetscSFCreate() 235 @*/ 236 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 237 { 238 PetscFunctionBegin; 239 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 240 PetscValidPointer(type,2); 241 *type = ((PetscObject)sf)->type_name; 242 PetscFunctionReturn(0); 243 } 244 245 /*@C 246 PetscSFDestroy - destroy star forest 247 248 Collective 249 250 Input Parameter: 251 . sf - address of star forest 252 253 Level: intermediate 254 255 .seealso: PetscSFCreate(), PetscSFReset() 256 @*/ 257 PetscErrorCode PetscSFDestroy(PetscSF *sf) 258 { 259 PetscErrorCode ierr; 260 261 PetscFunctionBegin; 262 if (!*sf) PetscFunctionReturn(0); 263 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 264 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 265 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 266 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 267 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 268 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 269 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 274 { 275 PetscInt i, nleaves; 276 PetscMPIInt size; 277 const PetscInt *ilocal; 278 const PetscSFNode *iremote; 279 PetscErrorCode ierr; 280 281 PetscFunctionBegin; 282 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 283 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 284 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 285 for (i = 0; i < nleaves; i++) { 286 const PetscInt rank = iremote[i].rank; 287 const PetscInt remote = iremote[i].index; 288 const PetscInt leaf = ilocal ? ilocal[i] : i; 289 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 290 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 291 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 292 } 293 PetscFunctionReturn(0); 294 } 295 296 /*@ 297 PetscSFSetUp - set up communication structures 298 299 Collective 300 301 Input Parameter: 302 . sf - star forest communication object 303 304 Level: beginner 305 306 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 307 @*/ 308 PetscErrorCode PetscSFSetUp(PetscSF sf) 309 { 310 PetscErrorCode ierr; 311 312 PetscFunctionBegin; 313 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 314 PetscSFCheckGraphSet(sf,1); 315 if (sf->setupcalled) PetscFunctionReturn(0); 316 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 317 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 318 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 319 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 320 #if defined(PETSC_HAVE_CUDA) 321 if (sf->backend == PETSCSF_BACKEND_CUDA) { 322 sf->ops->Malloc = PetscSFMalloc_CUDA; 323 sf->ops->Free = PetscSFFree_CUDA; 324 } 325 #endif 326 #if defined(PETSC_HAVE_HIP) 327 if (sf->backend == PETSCSF_BACKEND_HIP) { 328 sf->ops->Malloc = PetscSFMalloc_HIP; 329 sf->ops->Free = PetscSFFree_HIP; 330 } 331 #endif 332 333 # 334 #if defined(PETSC_HAVE_KOKKOS) 335 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 336 sf->ops->Malloc = PetscSFMalloc_Kokkos; 337 sf->ops->Free = PetscSFFree_Kokkos; 338 } 339 #endif 340 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 341 sf->setupcalled = PETSC_TRUE; 342 PetscFunctionReturn(0); 343 } 344 345 /*@ 346 PetscSFSetFromOptions - set PetscSF options using the options database 347 348 Logically Collective 349 350 Input Parameter: 351 . sf - star forest 352 353 Options Database Keys: 354 + -sf_type - implementation type, see PetscSFSetType() 355 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 356 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 357 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 358 If true, this option only works with -use_gpu_aware_mpi 1. 359 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 360 If true, this option only works with -use_gpu_aware_mpi 1. 361 362 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 363 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 364 the only available is kokkos. 365 366 Level: intermediate 367 @*/ 368 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 369 { 370 PetscSFType deft; 371 char type[256]; 372 PetscErrorCode ierr; 373 PetscBool flg; 374 375 PetscFunctionBegin; 376 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 377 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 378 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 379 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 380 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 381 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 382 #if defined(PETSC_HAVE_DEVICE) 383 { 384 char backendstr[32] = {0}; 385 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 386 /* Change the defaults set in PetscSFCreate() with command line options */ 387 ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 388 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 389 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 390 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 391 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 392 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 393 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 394 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 395 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 396 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 397 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 398 #elif defined(PETSC_HAVE_KOKKOS) 399 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 400 #endif 401 } 402 #endif 403 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 404 ierr = PetscOptionsEnd();CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 /*@ 409 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 410 411 Logically Collective 412 413 Input Parameters: 414 + sf - star forest 415 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 416 417 Level: advanced 418 419 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 420 @*/ 421 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 422 { 423 PetscFunctionBegin; 424 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 425 PetscValidLogicalCollectiveBool(sf,flg,2); 426 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 427 sf->rankorder = flg; 428 PetscFunctionReturn(0); 429 } 430 431 /*@ 432 PetscSFSetGraph - Set a parallel star forest 433 434 Collective 435 436 Input Parameters: 437 + sf - star forest 438 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 439 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 440 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 441 during setup in debug mode) 442 . localmode - copy mode for ilocal 443 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 444 during setup in debug mode) 445 - remotemode - copy mode for iremote 446 447 Level: intermediate 448 449 Notes: 450 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 451 452 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 453 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 454 needed 455 456 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 457 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 458 459 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 460 @*/ 461 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 462 { 463 PetscErrorCode ierr; 464 465 PetscFunctionBegin; 466 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 467 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 468 if (nleaves > 0) PetscValidPointer(iremote,6); 469 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 470 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 471 472 if (sf->nroots >= 0) { /* Reset only if graph already set */ 473 ierr = PetscSFReset(sf);CHKERRQ(ierr); 474 } 475 476 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 477 478 sf->nroots = nroots; 479 sf->nleaves = nleaves; 480 481 if (nleaves && ilocal) { 482 PetscInt i; 483 PetscInt minleaf = PETSC_MAX_INT; 484 PetscInt maxleaf = PETSC_MIN_INT; 485 int contiguous = 1; 486 for (i=0; i<nleaves; i++) { 487 minleaf = PetscMin(minleaf,ilocal[i]); 488 maxleaf = PetscMax(maxleaf,ilocal[i]); 489 contiguous &= (ilocal[i] == i); 490 } 491 sf->minleaf = minleaf; 492 sf->maxleaf = maxleaf; 493 if (contiguous) { 494 if (localmode == PETSC_OWN_POINTER) { 495 ierr = PetscFree(ilocal);CHKERRQ(ierr); 496 } 497 ilocal = NULL; 498 } 499 } else { 500 sf->minleaf = 0; 501 sf->maxleaf = nleaves - 1; 502 } 503 504 if (ilocal) { 505 switch (localmode) { 506 case PETSC_COPY_VALUES: 507 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 508 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 509 sf->mine = sf->mine_alloc; 510 break; 511 case PETSC_OWN_POINTER: 512 sf->mine_alloc = (PetscInt*)ilocal; 513 sf->mine = sf->mine_alloc; 514 break; 515 case PETSC_USE_POINTER: 516 sf->mine_alloc = NULL; 517 sf->mine = (PetscInt*)ilocal; 518 break; 519 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 520 } 521 } 522 523 switch (remotemode) { 524 case PETSC_COPY_VALUES: 525 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 526 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 527 sf->remote = sf->remote_alloc; 528 break; 529 case PETSC_OWN_POINTER: 530 sf->remote_alloc = (PetscSFNode*)iremote; 531 sf->remote = sf->remote_alloc; 532 break; 533 case PETSC_USE_POINTER: 534 sf->remote_alloc = NULL; 535 sf->remote = (PetscSFNode*)iremote; 536 break; 537 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 538 } 539 540 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 541 sf->graphset = PETSC_TRUE; 542 PetscFunctionReturn(0); 543 } 544 545 /*@ 546 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 547 548 Collective 549 550 Input Parameters: 551 + sf - The PetscSF 552 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 553 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 554 555 Notes: 556 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 557 n and N are local and global sizes of x respectively. 558 559 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 560 sequential vectors y on all ranks. 561 562 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 563 sequential vector y on rank 0. 564 565 In above cases, entries of x are roots and entries of y are leaves. 566 567 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 568 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 569 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 570 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 571 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 572 573 In this case, roots and leaves are symmetric. 574 575 Level: intermediate 576 @*/ 577 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 578 { 579 MPI_Comm comm; 580 PetscInt n,N,res[2]; 581 PetscMPIInt rank,size; 582 PetscSFType type; 583 PetscErrorCode ierr; 584 585 PetscFunctionBegin; 586 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 587 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 588 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 589 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 590 591 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 592 type = PETSCSFALLTOALL; 593 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 594 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 595 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 596 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 597 } else { 598 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 599 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 600 res[0] = n; 601 res[1] = -n; 602 /* Check if n are same over all ranks so that we can optimize it */ 603 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 604 if (res[0] == -res[1]) { /* same n */ 605 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 606 } else { 607 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 608 } 609 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 610 } 611 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 612 613 sf->pattern = pattern; 614 sf->mine = NULL; /* Contiguous */ 615 616 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 617 Also set other easy stuff. 618 */ 619 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 620 sf->nleaves = N; 621 sf->nroots = n; 622 sf->nranks = size; 623 sf->minleaf = 0; 624 sf->maxleaf = N - 1; 625 } else if (pattern == PETSCSF_PATTERN_GATHER) { 626 sf->nleaves = rank ? 0 : N; 627 sf->nroots = n; 628 sf->nranks = rank ? 0 : size; 629 sf->minleaf = 0; 630 sf->maxleaf = rank ? -1 : N - 1; 631 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 632 sf->nleaves = size; 633 sf->nroots = size; 634 sf->nranks = size; 635 sf->minleaf = 0; 636 sf->maxleaf = size - 1; 637 } 638 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 639 sf->graphset = PETSC_TRUE; 640 PetscFunctionReturn(0); 641 } 642 643 /*@ 644 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 645 646 Collective 647 648 Input Parameter: 649 . sf - star forest to invert 650 651 Output Parameter: 652 . isf - inverse of sf 653 654 Level: advanced 655 656 Notes: 657 All roots must have degree 1. 658 659 The local space may be a permutation, but cannot be sparse. 660 661 .seealso: PetscSFSetGraph() 662 @*/ 663 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 664 { 665 PetscErrorCode ierr; 666 PetscMPIInt rank; 667 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 668 const PetscInt *ilocal; 669 PetscSFNode *roots,*leaves; 670 671 PetscFunctionBegin; 672 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 673 PetscSFCheckGraphSet(sf,1); 674 PetscValidPointer(isf,2); 675 676 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 677 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 678 679 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 680 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 681 for (i=0; i<maxlocal; i++) { 682 leaves[i].rank = rank; 683 leaves[i].index = i; 684 } 685 for (i=0; i <nroots; i++) { 686 roots[i].rank = -1; 687 roots[i].index = -1; 688 } 689 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 690 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 691 692 /* Check whether our leaves are sparse */ 693 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 694 if (count == nroots) newilocal = NULL; 695 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 696 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 697 for (i=0,count=0; i<nroots; i++) { 698 if (roots[i].rank >= 0) { 699 newilocal[count] = i; 700 roots[count].rank = roots[i].rank; 701 roots[count].index = roots[i].index; 702 count++; 703 } 704 } 705 } 706 707 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 708 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 709 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 /*@ 714 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 715 716 Collective 717 718 Input Parameters: 719 + sf - communication object to duplicate 720 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 721 722 Output Parameter: 723 . newsf - new communication object 724 725 Level: beginner 726 727 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 728 @*/ 729 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 730 { 731 PetscSFType type; 732 PetscErrorCode ierr; 733 MPI_Datatype dtype=MPIU_SCALAR; 734 735 PetscFunctionBegin; 736 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 737 PetscValidLogicalCollectiveEnum(sf,opt,2); 738 PetscValidPointer(newsf,3); 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 740 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 741 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 742 if (opt == PETSCSF_DUPLICATE_GRAPH) { 743 PetscSFCheckGraphSet(sf,1); 744 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 745 PetscInt nroots,nleaves; 746 const PetscInt *ilocal; 747 const PetscSFNode *iremote; 748 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 749 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 750 } else { 751 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 752 } 753 } 754 /* Since oldtype is committed, so is newtype, according to MPI */ 755 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 756 (*newsf)->vscat.bs = sf->vscat.bs; 757 (*newsf)->vscat.unit = dtype; 758 (*newsf)->vscat.to_n = sf->vscat.to_n; 759 (*newsf)->vscat.from_n = sf->vscat.from_n; 760 /* Do not copy lsf. Build it on demand since it is rarely used */ 761 762 #if defined(PETSC_HAVE_DEVICE) 763 (*newsf)->backend = sf->backend; 764 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 765 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 766 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 767 #endif 768 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 769 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 770 PetscFunctionReturn(0); 771 } 772 773 /*@C 774 PetscSFGetGraph - Get the graph specifying a parallel star forest 775 776 Not Collective 777 778 Input Parameter: 779 . sf - star forest 780 781 Output Parameters: 782 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 783 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 784 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 785 - iremote - remote locations of root vertices for each leaf on the current process 786 787 Notes: 788 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 789 790 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 791 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 792 793 Level: intermediate 794 795 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 796 @*/ 797 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 798 { 799 PetscErrorCode ierr; 800 801 PetscFunctionBegin; 802 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 803 if (sf->ops->GetGraph) { 804 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 805 } else { 806 if (nroots) *nroots = sf->nroots; 807 if (nleaves) *nleaves = sf->nleaves; 808 if (ilocal) *ilocal = sf->mine; 809 if (iremote) *iremote = sf->remote; 810 } 811 PetscFunctionReturn(0); 812 } 813 814 /*@ 815 PetscSFGetLeafRange - Get the active leaf ranges 816 817 Not Collective 818 819 Input Parameter: 820 . sf - star forest 821 822 Output Parameters: 823 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 824 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 825 826 Level: developer 827 828 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 829 @*/ 830 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 831 { 832 PetscFunctionBegin; 833 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 834 PetscSFCheckGraphSet(sf,1); 835 if (minleaf) *minleaf = sf->minleaf; 836 if (maxleaf) *maxleaf = sf->maxleaf; 837 PetscFunctionReturn(0); 838 } 839 840 /*@C 841 PetscSFViewFromOptions - View from Options 842 843 Collective on PetscSF 844 845 Input Parameters: 846 + A - the star forest 847 . obj - Optional object 848 - name - command line option 849 850 Level: intermediate 851 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 852 @*/ 853 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 854 { 855 PetscErrorCode ierr; 856 857 PetscFunctionBegin; 858 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 859 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 /*@C 864 PetscSFView - view a star forest 865 866 Collective 867 868 Input Parameters: 869 + sf - star forest 870 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 871 872 Level: beginner 873 874 .seealso: PetscSFCreate(), PetscSFSetGraph() 875 @*/ 876 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 877 { 878 PetscErrorCode ierr; 879 PetscBool iascii; 880 PetscViewerFormat format; 881 882 PetscFunctionBegin; 883 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 884 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 885 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 886 PetscCheckSameComm(sf,1,viewer,2); 887 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 888 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 889 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 890 PetscMPIInt rank; 891 PetscInt ii,i,j; 892 893 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 894 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 895 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 896 if (!sf->graphset) { 897 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 898 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 902 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 903 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 904 for (i=0; i<sf->nleaves; i++) { 905 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 906 } 907 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 908 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 909 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 910 PetscMPIInt *tmpranks,*perm; 911 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 912 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 913 for (i=0; i<sf->nranks; i++) perm[i] = i; 914 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 915 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 916 for (ii=0; ii<sf->nranks; ii++) { 917 i = perm[ii]; 918 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 919 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 921 } 922 } 923 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 924 } 925 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 926 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 927 } 928 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 929 } 930 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 931 PetscFunctionReturn(0); 932 } 933 934 /*@C 935 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 936 937 Not Collective 938 939 Input Parameter: 940 . sf - star forest 941 942 Output Parameters: 943 + nranks - number of ranks referenced by local part 944 . ranks - array of ranks 945 . roffset - offset in rmine/rremote for each rank (length nranks+1) 946 . rmine - concatenated array holding local indices referencing each remote rank 947 - rremote - concatenated array holding remote indices referenced for each remote rank 948 949 Level: developer 950 951 .seealso: PetscSFGetLeafRanks() 952 @*/ 953 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 954 { 955 PetscErrorCode ierr; 956 957 PetscFunctionBegin; 958 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 959 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 960 if (sf->ops->GetRootRanks) { 961 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 962 } else { 963 /* The generic implementation */ 964 if (nranks) *nranks = sf->nranks; 965 if (ranks) *ranks = sf->ranks; 966 if (roffset) *roffset = sf->roffset; 967 if (rmine) *rmine = sf->rmine; 968 if (rremote) *rremote = sf->rremote; 969 } 970 PetscFunctionReturn(0); 971 } 972 973 /*@C 974 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 975 976 Not Collective 977 978 Input Parameter: 979 . sf - star forest 980 981 Output Parameters: 982 + niranks - number of leaf ranks referencing roots on this process 983 . iranks - array of ranks 984 . ioffset - offset in irootloc for each rank (length niranks+1) 985 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 986 987 Level: developer 988 989 .seealso: PetscSFGetRootRanks() 990 @*/ 991 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 992 { 993 PetscErrorCode ierr; 994 995 PetscFunctionBegin; 996 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 997 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 998 if (sf->ops->GetLeafRanks) { 999 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 1000 } else { 1001 PetscSFType type; 1002 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 1003 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1004 } 1005 PetscFunctionReturn(0); 1006 } 1007 1008 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 1009 PetscInt i; 1010 for (i=0; i<n; i++) { 1011 if (needle == list[i]) return PETSC_TRUE; 1012 } 1013 return PETSC_FALSE; 1014 } 1015 1016 /*@C 1017 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 1018 1019 Collective 1020 1021 Input Parameters: 1022 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1023 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1024 1025 Level: developer 1026 1027 .seealso: PetscSFGetRootRanks() 1028 @*/ 1029 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1030 { 1031 PetscErrorCode ierr; 1032 PetscTable table; 1033 PetscTablePosition pos; 1034 PetscMPIInt size,groupsize,*groupranks; 1035 PetscInt *rcount,*ranks; 1036 PetscInt i, irank = -1,orank = -1; 1037 1038 PetscFunctionBegin; 1039 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1040 PetscSFCheckGraphSet(sf,1); 1041 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1042 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1043 for (i=0; i<sf->nleaves; i++) { 1044 /* Log 1-based rank */ 1045 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1046 } 1047 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1048 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1049 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1050 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1051 for (i=0; i<sf->nranks; i++) { 1052 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1053 ranks[i]--; /* Convert back to 0-based */ 1054 } 1055 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1056 1057 /* We expect that dgroup is reliably "small" while nranks could be large */ 1058 { 1059 MPI_Group group = MPI_GROUP_NULL; 1060 PetscMPIInt *dgroupranks; 1061 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1062 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1063 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1064 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1065 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1066 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1067 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1068 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1069 } 1070 1071 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1072 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1073 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1074 if (InList(ranks[i],groupsize,groupranks)) break; 1075 } 1076 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1077 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1078 } 1079 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1080 PetscInt tmprank,tmpcount; 1081 1082 tmprank = ranks[i]; 1083 tmpcount = rcount[i]; 1084 ranks[i] = ranks[sf->ndranks]; 1085 rcount[i] = rcount[sf->ndranks]; 1086 ranks[sf->ndranks] = tmprank; 1087 rcount[sf->ndranks] = tmpcount; 1088 sf->ndranks++; 1089 } 1090 } 1091 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1092 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1093 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1094 sf->roffset[0] = 0; 1095 for (i=0; i<sf->nranks; i++) { 1096 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1097 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1098 rcount[i] = 0; 1099 } 1100 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1101 /* short circuit */ 1102 if (orank != sf->remote[i].rank) { 1103 /* Search for index of iremote[i].rank in sf->ranks */ 1104 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1105 if (irank < 0) { 1106 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1107 if (irank >= 0) irank += sf->ndranks; 1108 } 1109 orank = sf->remote[i].rank; 1110 } 1111 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1112 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1113 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1114 rcount[irank]++; 1115 } 1116 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 /*@C 1121 PetscSFGetGroups - gets incoming and outgoing process groups 1122 1123 Collective 1124 1125 Input Parameter: 1126 . sf - star forest 1127 1128 Output Parameters: 1129 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1130 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1131 1132 Level: developer 1133 1134 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1135 @*/ 1136 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1137 { 1138 PetscErrorCode ierr; 1139 MPI_Group group = MPI_GROUP_NULL; 1140 1141 PetscFunctionBegin; 1142 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1143 if (sf->ingroup == MPI_GROUP_NULL) { 1144 PetscInt i; 1145 const PetscInt *indegree; 1146 PetscMPIInt rank,*outranks,*inranks; 1147 PetscSFNode *remote; 1148 PetscSF bgcount; 1149 1150 /* Compute the number of incoming ranks */ 1151 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1152 for (i=0; i<sf->nranks; i++) { 1153 remote[i].rank = sf->ranks[i]; 1154 remote[i].index = 0; 1155 } 1156 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1157 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1158 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1159 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1160 /* Enumerate the incoming ranks */ 1161 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1162 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1163 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1164 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1165 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1166 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1167 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1168 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1169 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1170 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1171 } 1172 *incoming = sf->ingroup; 1173 1174 if (sf->outgroup == MPI_GROUP_NULL) { 1175 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1176 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1177 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1178 } 1179 *outgoing = sf->outgroup; 1180 PetscFunctionReturn(0); 1181 } 1182 1183 /*@ 1184 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1185 1186 Collective 1187 1188 Input Parameter: 1189 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1190 1191 Output Parameter: 1192 . multi - star forest with split roots, such that each root has degree exactly 1 1193 1194 Level: developer 1195 1196 Notes: 1197 1198 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1199 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1200 edge, it is a candidate for future optimization that might involve its removal. 1201 1202 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1203 @*/ 1204 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1205 { 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1210 PetscValidPointer(multi,2); 1211 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1212 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1213 *multi = sf->multi; 1214 sf->multi->multi = sf->multi; 1215 PetscFunctionReturn(0); 1216 } 1217 if (!sf->multi) { 1218 const PetscInt *indegree; 1219 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1220 PetscSFNode *remote; 1221 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1222 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1223 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1224 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1225 inoffset[0] = 0; 1226 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1227 for (i=0; i<maxlocal; i++) outones[i] = 1; 1228 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1229 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1230 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1231 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1232 for (i=0; i<sf->nroots; i++) { 1233 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1234 } 1235 } 1236 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1237 for (i=0; i<sf->nleaves; i++) { 1238 remote[i].rank = sf->remote[i].rank; 1239 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1240 } 1241 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1242 sf->multi->multi = sf->multi; 1243 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1244 if (sf->rankorder) { /* Sort the ranks */ 1245 PetscMPIInt rank; 1246 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1247 PetscSFNode *newremote; 1248 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1249 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1250 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1251 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1252 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1253 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1254 /* Sort the incoming ranks at each vertex, build the inverse map */ 1255 for (i=0; i<sf->nroots; i++) { 1256 PetscInt j; 1257 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1258 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1259 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1260 } 1261 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1262 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1263 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1264 for (i=0; i<sf->nleaves; i++) { 1265 newremote[i].rank = sf->remote[i].rank; 1266 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1267 } 1268 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1269 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1270 } 1271 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1272 } 1273 *multi = sf->multi; 1274 PetscFunctionReturn(0); 1275 } 1276 1277 /*@C 1278 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1279 1280 Collective 1281 1282 Input Parameters: 1283 + sf - original star forest 1284 . nselected - number of selected roots on this process 1285 - selected - indices of the selected roots on this process 1286 1287 Output Parameter: 1288 . esf - new star forest 1289 1290 Level: advanced 1291 1292 Note: 1293 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1294 be done by calling PetscSFGetGraph(). 1295 1296 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1297 @*/ 1298 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1299 { 1300 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1301 const PetscInt *ilocal; 1302 signed char *rootdata,*leafdata,*leafmem; 1303 const PetscSFNode *iremote; 1304 PetscSFNode *new_iremote; 1305 MPI_Comm comm; 1306 PetscErrorCode ierr; 1307 1308 PetscFunctionBegin; 1309 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1310 PetscSFCheckGraphSet(sf,1); 1311 if (nselected) PetscValidPointer(selected,3); 1312 PetscValidPointer(esf,4); 1313 1314 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1315 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1316 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1317 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1318 1319 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1320 PetscBool dups; 1321 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1322 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1323 for (i=0; i<nselected; i++) 1324 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1325 } 1326 1327 if (sf->ops->CreateEmbeddedRootSF) { 1328 ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1329 } else { 1330 /* A generic version of creating embedded sf */ 1331 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1332 maxlocal = maxleaf - minleaf + 1; 1333 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1334 leafdata = leafmem - minleaf; 1335 /* Tag selected roots and bcast to leaves */ 1336 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1337 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1338 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1339 1340 /* Build esf with leaves that are still connected */ 1341 esf_nleaves = 0; 1342 for (i=0; i<nleaves; i++) { 1343 j = ilocal ? ilocal[i] : i; 1344 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1345 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1346 */ 1347 esf_nleaves += (leafdata[j] ? 1 : 0); 1348 } 1349 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1350 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1351 for (i=n=0; i<nleaves; i++) { 1352 j = ilocal ? ilocal[i] : i; 1353 if (leafdata[j]) { 1354 new_ilocal[n] = j; 1355 new_iremote[n].rank = iremote[i].rank; 1356 new_iremote[n].index = iremote[i].index; 1357 ++n; 1358 } 1359 } 1360 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1361 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1362 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1363 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1364 } 1365 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 /*@C 1370 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1371 1372 Collective 1373 1374 Input Parameters: 1375 + sf - original star forest 1376 . nselected - number of selected leaves on this process 1377 - selected - indices of the selected leaves on this process 1378 1379 Output Parameter: 1380 . newsf - new star forest 1381 1382 Level: advanced 1383 1384 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1385 @*/ 1386 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1387 { 1388 const PetscSFNode *iremote; 1389 PetscSFNode *new_iremote; 1390 const PetscInt *ilocal; 1391 PetscInt i,nroots,*leaves,*new_ilocal; 1392 MPI_Comm comm; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1397 PetscSFCheckGraphSet(sf,1); 1398 if (nselected) PetscValidPointer(selected,3); 1399 PetscValidPointer(newsf,4); 1400 1401 /* Uniq selected[] and put results in leaves[] */ 1402 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1403 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1404 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1405 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1406 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1407 1408 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1409 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1410 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1411 } else { 1412 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1413 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1414 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1415 for (i=0; i<nselected; ++i) { 1416 const PetscInt l = leaves[i]; 1417 new_ilocal[i] = ilocal ? ilocal[l] : l; 1418 new_iremote[i].rank = iremote[l].rank; 1419 new_iremote[i].index = iremote[l].index; 1420 } 1421 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1422 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1423 } 1424 ierr = PetscFree(leaves);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 /*@C 1429 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1430 1431 Collective on PetscSF 1432 1433 Input Parameters: 1434 + sf - star forest on which to communicate 1435 . unit - data type associated with each node 1436 . rootdata - buffer to broadcast 1437 - op - operation to use for reduction 1438 1439 Output Parameter: 1440 . leafdata - buffer to be reduced with values from each leaf's respective root 1441 1442 Level: intermediate 1443 1444 Notes: 1445 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1446 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1447 use PetscSFBcastWithMemTypeBegin() instead. 1448 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1449 @*/ 1450 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1451 { 1452 PetscErrorCode ierr; 1453 PetscMemType rootmtype,leafmtype; 1454 1455 PetscFunctionBegin; 1456 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1457 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1458 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1459 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1460 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1461 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1462 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1463 PetscFunctionReturn(0); 1464 } 1465 1466 /*@C 1467 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1468 1469 Collective on PetscSF 1470 1471 Input Parameters: 1472 + sf - star forest on which to communicate 1473 . unit - data type associated with each node 1474 . rootmtype - memory type of rootdata 1475 . rootdata - buffer to broadcast 1476 . leafmtype - memory type of leafdata 1477 - op - operation to use for reduction 1478 1479 Output Parameter: 1480 . leafdata - buffer to be reduced with values from each leaf's respective root 1481 1482 Level: intermediate 1483 1484 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1485 @*/ 1486 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1487 { 1488 PetscErrorCode ierr; 1489 1490 PetscFunctionBegin; 1491 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1492 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1493 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1494 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1495 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1496 PetscFunctionReturn(0); 1497 } 1498 1499 /*@C 1500 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1501 1502 Collective 1503 1504 Input Parameters: 1505 + sf - star forest 1506 . unit - data type 1507 . rootdata - buffer to broadcast 1508 - op - operation to use for reduction 1509 1510 Output Parameter: 1511 . leafdata - buffer to be reduced with values from each leaf's respective root 1512 1513 Level: intermediate 1514 1515 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1516 @*/ 1517 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1518 { 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1523 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1524 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1525 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1526 PetscFunctionReturn(0); 1527 } 1528 1529 /*@C 1530 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1531 1532 Collective 1533 1534 Input Parameters: 1535 + sf - star forest 1536 . unit - data type 1537 . leafdata - values to reduce 1538 - op - reduction operation 1539 1540 Output Parameter: 1541 . rootdata - result of reduction of values from all leaves of each root 1542 1543 Level: intermediate 1544 1545 Notes: 1546 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1547 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1548 use PetscSFReduceWithMemTypeBegin() instead. 1549 1550 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1551 @*/ 1552 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1553 { 1554 PetscErrorCode ierr; 1555 PetscMemType rootmtype,leafmtype; 1556 1557 PetscFunctionBegin; 1558 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1559 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1560 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1561 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1562 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1563 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1564 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1565 PetscFunctionReturn(0); 1566 } 1567 1568 /*@C 1569 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1570 1571 Collective 1572 1573 Input Parameters: 1574 + sf - star forest 1575 . unit - data type 1576 . leafmtype - memory type of leafdata 1577 . leafdata - values to reduce 1578 . rootmtype - memory type of rootdata 1579 - op - reduction operation 1580 1581 Output Parameter: 1582 . rootdata - result of reduction of values from all leaves of each root 1583 1584 Level: intermediate 1585 1586 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1587 @*/ 1588 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1589 { 1590 PetscErrorCode ierr; 1591 1592 PetscFunctionBegin; 1593 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1594 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1595 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1596 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1597 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1598 PetscFunctionReturn(0); 1599 } 1600 1601 /*@C 1602 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1603 1604 Collective 1605 1606 Input Parameters: 1607 + sf - star forest 1608 . unit - data type 1609 . leafdata - values to reduce 1610 - op - reduction operation 1611 1612 Output Parameter: 1613 . rootdata - result of reduction of values from all leaves of each root 1614 1615 Level: intermediate 1616 1617 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1618 @*/ 1619 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1620 { 1621 PetscErrorCode ierr; 1622 1623 PetscFunctionBegin; 1624 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1625 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1626 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1627 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1628 PetscFunctionReturn(0); 1629 } 1630 1631 /*@C 1632 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1633 1634 Collective 1635 1636 Input Parameters: 1637 + sf - star forest 1638 . unit - data type 1639 . leafdata - leaf values to use in reduction 1640 - op - operation to use for reduction 1641 1642 Output Parameters: 1643 + rootdata - root values to be updated, input state is seen by first process to perform an update 1644 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1645 1646 Level: advanced 1647 1648 Note: 1649 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1650 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1651 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1652 integers. 1653 1654 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1655 @*/ 1656 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1657 { 1658 PetscErrorCode ierr; 1659 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1660 1661 PetscFunctionBegin; 1662 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1663 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1664 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1665 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1666 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1667 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1668 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1669 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1670 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 /*@C 1675 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1676 1677 Collective 1678 1679 Input Parameters: 1680 + sf - star forest 1681 . unit - data type 1682 . leafdata - leaf values to use in reduction 1683 - op - operation to use for reduction 1684 1685 Output Parameters: 1686 + rootdata - root values to be updated, input state is seen by first process to perform an update 1687 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1688 1689 Level: advanced 1690 1691 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1692 @*/ 1693 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1694 { 1695 PetscErrorCode ierr; 1696 1697 PetscFunctionBegin; 1698 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1699 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1700 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1701 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 /*@C 1706 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1707 1708 Collective 1709 1710 Input Parameter: 1711 . sf - star forest 1712 1713 Output Parameter: 1714 . degree - degree of each root vertex 1715 1716 Level: advanced 1717 1718 Notes: 1719 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1720 1721 .seealso: PetscSFGatherBegin() 1722 @*/ 1723 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1724 { 1725 PetscErrorCode ierr; 1726 1727 PetscFunctionBegin; 1728 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1729 PetscSFCheckGraphSet(sf,1); 1730 PetscValidPointer(degree,2); 1731 if (!sf->degreeknown) { 1732 PetscInt i, nroots = sf->nroots, maxlocal; 1733 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1734 maxlocal = sf->maxleaf-sf->minleaf+1; 1735 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1736 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1737 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1738 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1739 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1740 } 1741 *degree = NULL; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 /*@C 1746 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1747 1748 Collective 1749 1750 Input Parameter: 1751 . sf - star forest 1752 1753 Output Parameter: 1754 . degree - degree of each root vertex 1755 1756 Level: developer 1757 1758 Notes: 1759 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1760 1761 .seealso: 1762 @*/ 1763 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1764 { 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1769 PetscSFCheckGraphSet(sf,1); 1770 PetscValidPointer(degree,2); 1771 if (!sf->degreeknown) { 1772 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1773 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1774 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1775 sf->degreeknown = PETSC_TRUE; 1776 } 1777 *degree = sf->degree; 1778 PetscFunctionReturn(0); 1779 } 1780 1781 /*@C 1782 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1783 Each multi-root is assigned index of the corresponding original root. 1784 1785 Collective 1786 1787 Input Parameters: 1788 + sf - star forest 1789 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1790 1791 Output Parameters: 1792 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1793 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1794 1795 Level: developer 1796 1797 Notes: 1798 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1799 1800 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1801 @*/ 1802 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1803 { 1804 PetscSF msf; 1805 PetscInt i, j, k, nroots, nmroots; 1806 PetscErrorCode ierr; 1807 1808 PetscFunctionBegin; 1809 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1810 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1811 if (nroots) PetscValidIntPointer(degree,2); 1812 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1813 PetscValidPointer(multiRootsOrigNumbering,4); 1814 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1815 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1816 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1817 for (i=0,j=0,k=0; i<nroots; i++) { 1818 if (!degree[i]) continue; 1819 for (j=0; j<degree[i]; j++,k++) { 1820 (*multiRootsOrigNumbering)[k] = i; 1821 } 1822 } 1823 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1824 if (nMultiRoots) *nMultiRoots = nmroots; 1825 PetscFunctionReturn(0); 1826 } 1827 1828 /*@C 1829 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1830 1831 Collective 1832 1833 Input Parameters: 1834 + sf - star forest 1835 . unit - data type 1836 - leafdata - leaf data to gather to roots 1837 1838 Output Parameter: 1839 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1840 1841 Level: intermediate 1842 1843 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1844 @*/ 1845 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1846 { 1847 PetscErrorCode ierr; 1848 PetscSF multi = NULL; 1849 1850 PetscFunctionBegin; 1851 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1852 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1853 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1854 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 /*@C 1859 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1860 1861 Collective 1862 1863 Input Parameters: 1864 + sf - star forest 1865 . unit - data type 1866 - leafdata - leaf data to gather to roots 1867 1868 Output Parameter: 1869 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1870 1871 Level: intermediate 1872 1873 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1874 @*/ 1875 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1876 { 1877 PetscErrorCode ierr; 1878 PetscSF multi = NULL; 1879 1880 PetscFunctionBegin; 1881 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1882 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1883 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1884 PetscFunctionReturn(0); 1885 } 1886 1887 /*@C 1888 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1889 1890 Collective 1891 1892 Input Parameters: 1893 + sf - star forest 1894 . unit - data type 1895 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1896 1897 Output Parameter: 1898 . leafdata - leaf data to be update with personal data from each respective root 1899 1900 Level: intermediate 1901 1902 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1903 @*/ 1904 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1905 { 1906 PetscErrorCode ierr; 1907 PetscSF multi = NULL; 1908 1909 PetscFunctionBegin; 1910 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1911 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1912 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1913 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1914 PetscFunctionReturn(0); 1915 } 1916 1917 /*@C 1918 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1919 1920 Collective 1921 1922 Input Parameters: 1923 + sf - star forest 1924 . unit - data type 1925 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1926 1927 Output Parameter: 1928 . leafdata - leaf data to be update with personal data from each respective root 1929 1930 Level: intermediate 1931 1932 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1933 @*/ 1934 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1935 { 1936 PetscErrorCode ierr; 1937 PetscSF multi = NULL; 1938 1939 PetscFunctionBegin; 1940 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1941 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1942 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1947 { 1948 PetscInt i, n, nleaves; 1949 const PetscInt *ilocal = NULL; 1950 PetscHSetI seen; 1951 PetscErrorCode ierr; 1952 1953 PetscFunctionBegin; 1954 if (PetscDefined(USE_DEBUG)) { 1955 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1956 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1957 for (i = 0; i < nleaves; i++) { 1958 const PetscInt leaf = ilocal ? ilocal[i] : i; 1959 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1960 } 1961 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1962 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1963 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1964 } 1965 PetscFunctionReturn(0); 1966 } 1967 1968 /*@ 1969 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1970 1971 Input Parameters: 1972 + sfA - The first PetscSF 1973 - sfB - The second PetscSF 1974 1975 Output Parameters: 1976 . sfBA - The composite SF 1977 1978 Level: developer 1979 1980 Notes: 1981 Currently, the two SFs must be defined on congruent communicators and they must be true star 1982 forests, i.e. the same leaf is not connected with different roots. 1983 1984 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1985 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1986 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1987 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1988 1989 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1990 @*/ 1991 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1992 { 1993 PetscErrorCode ierr; 1994 const PetscSFNode *remotePointsA,*remotePointsB; 1995 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1996 const PetscInt *localPointsA,*localPointsB; 1997 PetscInt *localPointsBA; 1998 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1999 PetscBool denseB; 2000 2001 PetscFunctionBegin; 2002 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2003 PetscSFCheckGraphSet(sfA,1); 2004 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2005 PetscSFCheckGraphSet(sfB,2); 2006 PetscCheckSameComm(sfA,1,sfB,2); 2007 PetscValidPointer(sfBA,3); 2008 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2009 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2010 2011 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2012 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2013 if (localPointsA) { 2014 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2015 for (i=0; i<numRootsB; i++) { 2016 reorderedRemotePointsA[i].rank = -1; 2017 reorderedRemotePointsA[i].index = -1; 2018 } 2019 for (i=0; i<numLeavesA; i++) { 2020 if (localPointsA[i] >= numRootsB) continue; 2021 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2022 } 2023 remotePointsA = reorderedRemotePointsA; 2024 } 2025 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2026 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2027 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2028 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2029 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2030 2031 denseB = (PetscBool)!localPointsB; 2032 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2033 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2034 else numLeavesBA++; 2035 } 2036 if (denseB) { 2037 localPointsBA = NULL; 2038 remotePointsBA = leafdataB; 2039 } else { 2040 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2041 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2042 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2043 const PetscInt l = localPointsB ? localPointsB[i] : i; 2044 2045 if (leafdataB[l-minleaf].rank == -1) continue; 2046 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2047 localPointsBA[numLeavesBA] = l; 2048 numLeavesBA++; 2049 } 2050 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2051 } 2052 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2053 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2054 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 /*@ 2059 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2060 2061 Input Parameters: 2062 + sfA - The first PetscSF 2063 - sfB - The second PetscSF 2064 2065 Output Parameters: 2066 . sfBA - The composite SF. 2067 2068 Level: developer 2069 2070 Notes: 2071 Currently, the two SFs must be defined on congruent communicators and they must be true star 2072 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2073 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2074 2075 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2076 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2077 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2078 a Reduce on sfB, on connected roots. 2079 2080 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2081 @*/ 2082 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2083 { 2084 PetscErrorCode ierr; 2085 const PetscSFNode *remotePointsA,*remotePointsB; 2086 PetscSFNode *remotePointsBA; 2087 const PetscInt *localPointsA,*localPointsB; 2088 PetscSFNode *reorderedRemotePointsA = NULL; 2089 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2090 MPI_Op op; 2091 #if defined(PETSC_USE_64BIT_INDICES) 2092 PetscBool iswin; 2093 #endif 2094 2095 PetscFunctionBegin; 2096 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2097 PetscSFCheckGraphSet(sfA,1); 2098 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2099 PetscSFCheckGraphSet(sfB,2); 2100 PetscCheckSameComm(sfA,1,sfB,2); 2101 PetscValidPointer(sfBA,3); 2102 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2103 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2104 2105 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2106 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2107 2108 /* TODO: Check roots of sfB have degree of 1 */ 2109 /* Once we implement it, we can replace the MPI_MAXLOC 2110 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2111 We use MPI_MAXLOC only to have a deterministic output from this routine if 2112 the root condition is not meet. 2113 */ 2114 op = MPI_MAXLOC; 2115 #if defined(PETSC_USE_64BIT_INDICES) 2116 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2117 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2118 if (iswin) op = MPI_REPLACE; 2119 #endif 2120 2121 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2122 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2123 for (i=0; i<maxleaf - minleaf + 1; i++) { 2124 reorderedRemotePointsA[i].rank = -1; 2125 reorderedRemotePointsA[i].index = -1; 2126 } 2127 if (localPointsA) { 2128 for (i=0; i<numLeavesA; i++) { 2129 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2130 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2131 } 2132 } else { 2133 for (i=0; i<numLeavesA; i++) { 2134 if (i > maxleaf || i < minleaf) continue; 2135 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2136 } 2137 } 2138 2139 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2140 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2141 for (i=0; i<numRootsB; i++) { 2142 remotePointsBA[i].rank = -1; 2143 remotePointsBA[i].index = -1; 2144 } 2145 2146 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2147 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2148 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2149 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2150 if (remotePointsBA[i].rank == -1) continue; 2151 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2152 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2153 localPointsBA[numLeavesBA] = i; 2154 numLeavesBA++; 2155 } 2156 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2157 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2158 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 /* 2163 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2164 2165 Input Parameters: 2166 . sf - The global PetscSF 2167 2168 Output Parameters: 2169 . out - The local PetscSF 2170 */ 2171 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2172 { 2173 MPI_Comm comm; 2174 PetscMPIInt myrank; 2175 const PetscInt *ilocal; 2176 const PetscSFNode *iremote; 2177 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2178 PetscSFNode *liremote; 2179 PetscSF lsf; 2180 PetscErrorCode ierr; 2181 2182 PetscFunctionBegin; 2183 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2184 if (sf->ops->CreateLocalSF) { 2185 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2186 } else { 2187 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2188 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2189 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2190 2191 /* Find out local edges and build a local SF */ 2192 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2193 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2194 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2195 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2196 2197 for (i=j=0; i<nleaves; i++) { 2198 if (iremote[i].rank == (PetscInt)myrank) { 2199 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2200 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2201 liremote[j].index = iremote[i].index; 2202 j++; 2203 } 2204 } 2205 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2206 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2207 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2208 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2209 *out = lsf; 2210 } 2211 PetscFunctionReturn(0); 2212 } 2213 2214 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2215 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2216 { 2217 PetscErrorCode ierr; 2218 PetscMemType rootmtype,leafmtype; 2219 2220 PetscFunctionBegin; 2221 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2222 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2223 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2224 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2225 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2226 if (sf->ops->BcastToZero) { 2227 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2228 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2229 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233