1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) do { \ 19 if (PetscUnlikely(!(sf)->graphset)) \ 20 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 21 } while (0) 22 #else 23 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window -Use MPI-3 one-sided window for communication 43 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 44 45 Level: intermediate 46 47 Notes: 48 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 49 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 50 SFs are optimized and they have better performance than general SFs. 51 52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 53 @*/ 54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 55 { 56 PetscErrorCode ierr; 57 PetscSF b; 58 59 PetscFunctionBegin; 60 PetscValidPointer(sf,2); 61 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 62 63 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 64 65 b->nroots = -1; 66 b->nleaves = -1; 67 b->minleaf = PETSC_MAX_INT; 68 b->maxleaf = PETSC_MIN_INT; 69 b->nranks = -1; 70 b->rankorder = PETSC_TRUE; 71 b->ingroup = MPI_GROUP_NULL; 72 b->outgroup = MPI_GROUP_NULL; 73 b->graphset = PETSC_FALSE; 74 #if defined(PETSC_HAVE_DEVICE) 75 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 76 b->use_stream_aware_mpi = PETSC_FALSE; 77 b->unknown_input_stream= PETSC_FALSE; 78 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 79 b->backend = PETSCSF_BACKEND_KOKKOS; 80 #elif defined(PETSC_HAVE_CUDA) 81 b->backend = PETSCSF_BACKEND_CUDA; 82 #elif defined(PETSC_HAVE_HIP) 83 b->backend = PETSCSF_BACKEND_HIP; 84 #endif 85 86 #if defined(PETSC_HAVE_NVSHMEM) 87 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 88 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 89 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 90 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 91 #endif 92 #endif 93 b->vscat.from_n = -1; 94 b->vscat.to_n = -1; 95 b->vscat.unit = MPIU_SCALAR; 96 *sf = b; 97 PetscFunctionReturn(0); 98 } 99 100 /*@ 101 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 102 103 Collective 104 105 Input Parameter: 106 . sf - star forest 107 108 Level: advanced 109 110 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 111 @*/ 112 PetscErrorCode PetscSFReset(PetscSF sf) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 118 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_MAX_INT; 122 sf->maxleaf = PETSC_MIN_INT; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 127 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 128 sf->nranks = -1; 129 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 130 sf->degreeknown = PETSC_FALSE; 131 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 132 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 133 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 134 if (sf->multi) sf->multi->multi = NULL; 135 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 136 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 137 138 #if defined(PETSC_HAVE_DEVICE) 139 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 140 #endif 141 142 sf->setupcalled = PETSC_FALSE; 143 PetscFunctionReturn(0); 144 } 145 146 /*@C 147 PetscSFSetType - Set the PetscSF communication implementation 148 149 Collective on PetscSF 150 151 Input Parameters: 152 + sf - the PetscSF context 153 - type - a known method 154 155 Options Database Key: 156 . -sf_type <type> - Sets the method; use -help for a list 157 of available methods (for instance, window, basic, neighbor) 158 159 Notes: 160 See "include/petscsf.h" for available methods (for instance) 161 + PETSCSFWINDOW - MPI-2/3 one-sided 162 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 163 164 Level: intermediate 165 166 .seealso: PetscSFType, PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 169 { 170 PetscErrorCode ierr,(*r)(PetscSF); 171 PetscBool match; 172 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 175 PetscValidCharPointer(type,2); 176 177 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 178 if (match) PetscFunctionReturn(0); 179 180 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 181 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 182 /* Destroy the previous PetscSF implementation context */ 183 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 184 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 185 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 186 ierr = (*r)(sf);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 /*@C 191 PetscSFGetType - Get the PetscSF communication implementation 192 193 Not Collective 194 195 Input Parameter: 196 . sf - the PetscSF context 197 198 Output Parameter: 199 . type - the PetscSF type name 200 201 Level: intermediate 202 203 .seealso: PetscSFSetType(), PetscSFCreate() 204 @*/ 205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 206 { 207 PetscFunctionBegin; 208 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 209 PetscValidPointer(type,2); 210 *type = ((PetscObject)sf)->type_name; 211 PetscFunctionReturn(0); 212 } 213 214 /*@C 215 PetscSFDestroy - destroy star forest 216 217 Collective 218 219 Input Parameter: 220 . sf - address of star forest 221 222 Level: intermediate 223 224 .seealso: PetscSFCreate(), PetscSFReset() 225 @*/ 226 PetscErrorCode PetscSFDestroy(PetscSF *sf) 227 { 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 if (!*sf) PetscFunctionReturn(0); 232 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 233 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 234 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 235 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 236 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 237 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 238 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 243 { 244 PetscInt i, nleaves; 245 PetscMPIInt size; 246 const PetscInt *ilocal; 247 const PetscSFNode *iremote; 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 252 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 254 for (i = 0; i < nleaves; i++) { 255 const PetscInt rank = iremote[i].rank; 256 const PetscInt remote = iremote[i].index; 257 const PetscInt leaf = ilocal ? ilocal[i] : i; 258 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 259 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 260 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 261 } 262 PetscFunctionReturn(0); 263 } 264 265 /*@ 266 PetscSFSetUp - set up communication structures 267 268 Collective 269 270 Input Parameter: 271 . sf - star forest communication object 272 273 Level: beginner 274 275 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 276 @*/ 277 PetscErrorCode PetscSFSetUp(PetscSF sf) 278 { 279 PetscErrorCode ierr; 280 281 PetscFunctionBegin; 282 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 283 PetscSFCheckGraphSet(sf,1); 284 if (sf->setupcalled) PetscFunctionReturn(0); 285 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 286 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 287 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 288 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 289 #if defined(PETSC_HAVE_CUDA) 290 if (sf->backend == PETSCSF_BACKEND_CUDA) { 291 sf->ops->Malloc = PetscSFMalloc_CUDA; 292 sf->ops->Free = PetscSFFree_CUDA; 293 } 294 #endif 295 #if defined(PETSC_HAVE_HIP) 296 if (sf->backend == PETSCSF_BACKEND_HIP) { 297 sf->ops->Malloc = PetscSFMalloc_HIP; 298 sf->ops->Free = PetscSFFree_HIP; 299 } 300 #endif 301 302 # 303 #if defined(PETSC_HAVE_KOKKOS) 304 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 305 sf->ops->Malloc = PetscSFMalloc_Kokkos; 306 sf->ops->Free = PetscSFFree_Kokkos; 307 } 308 #endif 309 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 310 sf->setupcalled = PETSC_TRUE; 311 PetscFunctionReturn(0); 312 } 313 314 /*@ 315 PetscSFSetFromOptions - set PetscSF options using the options database 316 317 Logically Collective 318 319 Input Parameter: 320 . sf - star forest 321 322 Options Database Keys: 323 + -sf_type - implementation type, see PetscSFSetType() 324 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 325 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 326 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 327 If true, this option only works with -use_gpu_aware_mpi 1. 328 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 329 If true, this option only works with -use_gpu_aware_mpi 1. 330 331 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 332 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 333 the only available is kokkos. 334 335 Level: intermediate 336 @*/ 337 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 338 { 339 PetscSFType deft; 340 char type[256]; 341 PetscErrorCode ierr; 342 PetscBool flg; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 346 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 347 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 348 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 349 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 350 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 351 #if defined(PETSC_HAVE_DEVICE) 352 { 353 char backendstr[32] = {0}; 354 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 355 /* Change the defaults set in PetscSFCreate() with command line options */ 356 ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 357 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 358 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 359 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 360 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 361 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 362 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 363 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 364 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 365 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 366 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 367 #elif defined(PETSC_HAVE_KOKKOS) 368 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 369 #endif 370 } 371 #endif 372 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 373 ierr = PetscOptionsEnd();CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 /*@ 378 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 379 380 Logically Collective 381 382 Input Parameters: 383 + sf - star forest 384 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 385 386 Level: advanced 387 388 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 389 @*/ 390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 391 { 392 PetscFunctionBegin; 393 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 394 PetscValidLogicalCollectiveBool(sf,flg,2); 395 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 396 sf->rankorder = flg; 397 PetscFunctionReturn(0); 398 } 399 400 /*@C 401 PetscSFSetGraph - Set a parallel star forest 402 403 Collective 404 405 Input Parameters: 406 + sf - star forest 407 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 408 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 409 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 410 during setup in debug mode) 411 . localmode - copy mode for ilocal 412 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 413 during setup in debug mode) 414 - remotemode - copy mode for iremote 415 416 Level: intermediate 417 418 Notes: 419 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 420 421 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 422 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 423 needed 424 425 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 426 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 427 428 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 429 @*/ 430 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 431 { 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 436 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 437 if (nleaves > 0) PetscValidPointer(iremote,6); 438 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 439 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 440 441 if (sf->nroots >= 0) { /* Reset only if graph already set */ 442 ierr = PetscSFReset(sf);CHKERRQ(ierr); 443 } 444 445 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 446 447 sf->nroots = nroots; 448 sf->nleaves = nleaves; 449 450 if (nleaves && ilocal) { 451 PetscInt i; 452 PetscInt minleaf = PETSC_MAX_INT; 453 PetscInt maxleaf = PETSC_MIN_INT; 454 int contiguous = 1; 455 for (i=0; i<nleaves; i++) { 456 minleaf = PetscMin(minleaf,ilocal[i]); 457 maxleaf = PetscMax(maxleaf,ilocal[i]); 458 contiguous &= (ilocal[i] == i); 459 } 460 sf->minleaf = minleaf; 461 sf->maxleaf = maxleaf; 462 if (contiguous) { 463 if (localmode == PETSC_OWN_POINTER) { 464 ierr = PetscFree(ilocal);CHKERRQ(ierr); 465 } 466 ilocal = NULL; 467 } 468 } else { 469 sf->minleaf = 0; 470 sf->maxleaf = nleaves - 1; 471 } 472 473 if (ilocal) { 474 switch (localmode) { 475 case PETSC_COPY_VALUES: 476 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 477 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 478 sf->mine = sf->mine_alloc; 479 break; 480 case PETSC_OWN_POINTER: 481 sf->mine_alloc = (PetscInt*)ilocal; 482 sf->mine = sf->mine_alloc; 483 break; 484 case PETSC_USE_POINTER: 485 sf->mine_alloc = NULL; 486 sf->mine = (PetscInt*)ilocal; 487 break; 488 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 489 } 490 } 491 492 switch (remotemode) { 493 case PETSC_COPY_VALUES: 494 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 495 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 496 sf->remote = sf->remote_alloc; 497 break; 498 case PETSC_OWN_POINTER: 499 sf->remote_alloc = (PetscSFNode*)iremote; 500 sf->remote = sf->remote_alloc; 501 break; 502 case PETSC_USE_POINTER: 503 sf->remote_alloc = NULL; 504 sf->remote = (PetscSFNode*)iremote; 505 break; 506 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 507 } 508 509 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 510 sf->graphset = PETSC_TRUE; 511 PetscFunctionReturn(0); 512 } 513 514 /*@ 515 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 516 517 Collective 518 519 Input Parameters: 520 + sf - The PetscSF 521 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 522 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 523 524 Notes: 525 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 526 n and N are local and global sizes of x respectively. 527 528 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 529 sequential vectors y on all ranks. 530 531 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 532 sequential vector y on rank 0. 533 534 In above cases, entries of x are roots and entries of y are leaves. 535 536 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 537 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 538 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 539 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 540 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 541 542 In this case, roots and leaves are symmetric. 543 544 Level: intermediate 545 @*/ 546 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 547 { 548 MPI_Comm comm; 549 PetscInt n,N,res[2]; 550 PetscMPIInt rank,size; 551 PetscSFType type; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 556 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 557 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 558 if (PetscUnlikely((pattern < PETSCSF_PATTERN_ALLGATHER) || (pattern > PETSCSF_PATTERN_ALLTOALL))) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 559 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 560 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 561 562 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 563 type = PETSCSFALLTOALL; 564 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 565 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 566 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 567 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 568 } else { 569 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 570 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 571 res[0] = n; 572 res[1] = -n; 573 /* Check if n are same over all ranks so that we can optimize it */ 574 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 575 if (res[0] == -res[1]) { /* same n */ 576 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 577 } else { 578 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 579 } 580 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 581 } 582 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 583 584 sf->pattern = pattern; 585 sf->mine = NULL; /* Contiguous */ 586 587 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 588 Also set other easy stuff. 589 */ 590 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 591 sf->nleaves = N; 592 sf->nroots = n; 593 sf->nranks = size; 594 sf->minleaf = 0; 595 sf->maxleaf = N - 1; 596 } else if (pattern == PETSCSF_PATTERN_GATHER) { 597 sf->nleaves = rank ? 0 : N; 598 sf->nroots = n; 599 sf->nranks = rank ? 0 : size; 600 sf->minleaf = 0; 601 sf->maxleaf = rank ? -1 : N - 1; 602 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 603 sf->nleaves = size; 604 sf->nroots = size; 605 sf->nranks = size; 606 sf->minleaf = 0; 607 sf->maxleaf = size - 1; 608 } 609 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 610 sf->graphset = PETSC_TRUE; 611 PetscFunctionReturn(0); 612 } 613 614 /*@ 615 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 616 617 Collective 618 619 Input Parameter: 620 . sf - star forest to invert 621 622 Output Parameter: 623 . isf - inverse of sf 624 625 Level: advanced 626 627 Notes: 628 All roots must have degree 1. 629 630 The local space may be a permutation, but cannot be sparse. 631 632 .seealso: PetscSFSetGraph() 633 @*/ 634 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 635 { 636 PetscErrorCode ierr; 637 PetscMPIInt rank; 638 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 639 const PetscInt *ilocal; 640 PetscSFNode *roots,*leaves; 641 642 PetscFunctionBegin; 643 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 644 PetscSFCheckGraphSet(sf,1); 645 PetscValidPointer(isf,2); 646 647 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 648 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 649 650 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 651 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 652 for (i=0; i<maxlocal; i++) { 653 leaves[i].rank = rank; 654 leaves[i].index = i; 655 } 656 for (i=0; i <nroots; i++) { 657 roots[i].rank = -1; 658 roots[i].index = -1; 659 } 660 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 661 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 662 663 /* Check whether our leaves are sparse */ 664 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 665 if (count == nroots) newilocal = NULL; 666 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 667 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 668 for (i=0,count=0; i<nroots; i++) { 669 if (roots[i].rank >= 0) { 670 newilocal[count] = i; 671 roots[count].rank = roots[i].rank; 672 roots[count].index = roots[i].index; 673 count++; 674 } 675 } 676 } 677 678 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 679 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 680 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 /*@ 685 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 686 687 Collective 688 689 Input Parameters: 690 + sf - communication object to duplicate 691 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 692 693 Output Parameter: 694 . newsf - new communication object 695 696 Level: beginner 697 698 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 699 @*/ 700 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 701 { 702 PetscSFType type; 703 PetscErrorCode ierr; 704 MPI_Datatype dtype=MPIU_SCALAR; 705 706 PetscFunctionBegin; 707 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 708 PetscValidLogicalCollectiveEnum(sf,opt,2); 709 PetscValidPointer(newsf,3); 710 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 711 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 712 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 713 if (opt == PETSCSF_DUPLICATE_GRAPH) { 714 PetscSFCheckGraphSet(sf,1); 715 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 716 PetscInt nroots,nleaves; 717 const PetscInt *ilocal; 718 const PetscSFNode *iremote; 719 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 720 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 721 } else { 722 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 723 } 724 } 725 /* Since oldtype is committed, so is newtype, according to MPI */ 726 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 727 (*newsf)->vscat.bs = sf->vscat.bs; 728 (*newsf)->vscat.unit = dtype; 729 (*newsf)->vscat.to_n = sf->vscat.to_n; 730 (*newsf)->vscat.from_n = sf->vscat.from_n; 731 /* Do not copy lsf. Build it on demand since it is rarely used */ 732 733 #if defined(PETSC_HAVE_DEVICE) 734 (*newsf)->backend = sf->backend; 735 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 736 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 737 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 738 #endif 739 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 740 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 741 PetscFunctionReturn(0); 742 } 743 744 /*@C 745 PetscSFGetGraph - Get the graph specifying a parallel star forest 746 747 Not Collective 748 749 Input Parameter: 750 . sf - star forest 751 752 Output Parameters: 753 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 754 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 755 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 756 - iremote - remote locations of root vertices for each leaf on the current process 757 758 Notes: 759 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 760 761 Fortran Notes: 762 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 763 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 764 765 To check for a NULL ilocal use 766 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 767 768 Level: intermediate 769 770 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 771 @*/ 772 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 773 { 774 PetscErrorCode ierr; 775 776 PetscFunctionBegin; 777 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 778 if (sf->ops->GetGraph) { 779 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 780 } else { 781 if (nroots) *nroots = sf->nroots; 782 if (nleaves) *nleaves = sf->nleaves; 783 if (ilocal) *ilocal = sf->mine; 784 if (iremote) *iremote = sf->remote; 785 } 786 PetscFunctionReturn(0); 787 } 788 789 /*@ 790 PetscSFGetLeafRange - Get the active leaf ranges 791 792 Not Collective 793 794 Input Parameter: 795 . sf - star forest 796 797 Output Parameters: 798 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 799 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 800 801 Level: developer 802 803 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 804 @*/ 805 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 806 { 807 PetscFunctionBegin; 808 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 809 PetscSFCheckGraphSet(sf,1); 810 if (minleaf) *minleaf = sf->minleaf; 811 if (maxleaf) *maxleaf = sf->maxleaf; 812 PetscFunctionReturn(0); 813 } 814 815 /*@C 816 PetscSFViewFromOptions - View from Options 817 818 Collective on PetscSF 819 820 Input Parameters: 821 + A - the star forest 822 . obj - Optional object 823 - name - command line option 824 825 Level: intermediate 826 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 827 @*/ 828 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 829 { 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 834 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 /*@C 839 PetscSFView - view a star forest 840 841 Collective 842 843 Input Parameters: 844 + sf - star forest 845 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 846 847 Level: beginner 848 849 .seealso: PetscSFCreate(), PetscSFSetGraph() 850 @*/ 851 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 852 { 853 PetscErrorCode ierr; 854 PetscBool iascii; 855 PetscViewerFormat format; 856 857 PetscFunctionBegin; 858 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 859 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 860 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 861 PetscCheckSameComm(sf,1,viewer,2); 862 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 863 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 864 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 865 PetscMPIInt rank; 866 PetscInt ii,i,j; 867 868 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 870 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 871 if (!sf->graphset) { 872 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 873 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 877 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 878 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 879 for (i=0; i<sf->nleaves; i++) { 880 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 881 } 882 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 PetscMPIInt *tmpranks,*perm; 886 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 887 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 888 for (i=0; i<sf->nranks; i++) perm[i] = i; 889 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 891 for (ii=0; ii<sf->nranks; ii++) { 892 i = perm[ii]; 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 894 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 896 } 897 } 898 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 899 } 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 902 } 903 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 904 } 905 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 906 PetscFunctionReturn(0); 907 } 908 909 /*@C 910 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 911 912 Not Collective 913 914 Input Parameter: 915 . sf - star forest 916 917 Output Parameters: 918 + nranks - number of ranks referenced by local part 919 . ranks - array of ranks 920 . roffset - offset in rmine/rremote for each rank (length nranks+1) 921 . rmine - concatenated array holding local indices referencing each remote rank 922 - rremote - concatenated array holding remote indices referenced for each remote rank 923 924 Level: developer 925 926 .seealso: PetscSFGetLeafRanks() 927 @*/ 928 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 929 { 930 PetscErrorCode ierr; 931 932 PetscFunctionBegin; 933 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 934 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 935 if (sf->ops->GetRootRanks) { 936 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 937 } else { 938 /* The generic implementation */ 939 if (nranks) *nranks = sf->nranks; 940 if (ranks) *ranks = sf->ranks; 941 if (roffset) *roffset = sf->roffset; 942 if (rmine) *rmine = sf->rmine; 943 if (rremote) *rremote = sf->rremote; 944 } 945 PetscFunctionReturn(0); 946 } 947 948 /*@C 949 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 950 951 Not Collective 952 953 Input Parameter: 954 . sf - star forest 955 956 Output Parameters: 957 + niranks - number of leaf ranks referencing roots on this process 958 . iranks - array of ranks 959 . ioffset - offset in irootloc for each rank (length niranks+1) 960 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 961 962 Level: developer 963 964 .seealso: PetscSFGetRootRanks() 965 @*/ 966 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 967 { 968 PetscErrorCode ierr; 969 970 PetscFunctionBegin; 971 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 972 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 973 if (sf->ops->GetLeafRanks) { 974 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 975 } else { 976 PetscSFType type; 977 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 978 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 979 } 980 PetscFunctionReturn(0); 981 } 982 983 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 984 PetscInt i; 985 for (i=0; i<n; i++) { 986 if (needle == list[i]) return PETSC_TRUE; 987 } 988 return PETSC_FALSE; 989 } 990 991 /*@C 992 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 993 994 Collective 995 996 Input Parameters: 997 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 998 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 999 1000 Level: developer 1001 1002 .seealso: PetscSFGetRootRanks() 1003 @*/ 1004 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1005 { 1006 PetscErrorCode ierr; 1007 PetscTable table; 1008 PetscTablePosition pos; 1009 PetscMPIInt size,groupsize,*groupranks; 1010 PetscInt *rcount,*ranks; 1011 PetscInt i, irank = -1,orank = -1; 1012 1013 PetscFunctionBegin; 1014 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1015 PetscSFCheckGraphSet(sf,1); 1016 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1017 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1018 for (i=0; i<sf->nleaves; i++) { 1019 /* Log 1-based rank */ 1020 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1021 } 1022 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1023 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1024 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1025 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1026 for (i=0; i<sf->nranks; i++) { 1027 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1028 ranks[i]--; /* Convert back to 0-based */ 1029 } 1030 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1031 1032 /* We expect that dgroup is reliably "small" while nranks could be large */ 1033 { 1034 MPI_Group group = MPI_GROUP_NULL; 1035 PetscMPIInt *dgroupranks; 1036 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1037 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1038 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1039 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1040 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1041 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1042 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1043 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1044 } 1045 1046 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1047 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1048 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1049 if (InList(ranks[i],groupsize,groupranks)) break; 1050 } 1051 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1052 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1053 } 1054 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1055 PetscInt tmprank,tmpcount; 1056 1057 tmprank = ranks[i]; 1058 tmpcount = rcount[i]; 1059 ranks[i] = ranks[sf->ndranks]; 1060 rcount[i] = rcount[sf->ndranks]; 1061 ranks[sf->ndranks] = tmprank; 1062 rcount[sf->ndranks] = tmpcount; 1063 sf->ndranks++; 1064 } 1065 } 1066 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1067 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1068 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1069 sf->roffset[0] = 0; 1070 for (i=0; i<sf->nranks; i++) { 1071 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1072 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1073 rcount[i] = 0; 1074 } 1075 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1076 /* short circuit */ 1077 if (orank != sf->remote[i].rank) { 1078 /* Search for index of iremote[i].rank in sf->ranks */ 1079 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1080 if (irank < 0) { 1081 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1082 if (irank >= 0) irank += sf->ndranks; 1083 } 1084 orank = sf->remote[i].rank; 1085 } 1086 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1087 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1088 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1089 rcount[irank]++; 1090 } 1091 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 /*@C 1096 PetscSFGetGroups - gets incoming and outgoing process groups 1097 1098 Collective 1099 1100 Input Parameter: 1101 . sf - star forest 1102 1103 Output Parameters: 1104 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1105 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1106 1107 Level: developer 1108 1109 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1110 @*/ 1111 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1112 { 1113 PetscErrorCode ierr; 1114 MPI_Group group = MPI_GROUP_NULL; 1115 1116 PetscFunctionBegin; 1117 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1118 if (sf->ingroup == MPI_GROUP_NULL) { 1119 PetscInt i; 1120 const PetscInt *indegree; 1121 PetscMPIInt rank,*outranks,*inranks; 1122 PetscSFNode *remote; 1123 PetscSF bgcount; 1124 1125 /* Compute the number of incoming ranks */ 1126 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1127 for (i=0; i<sf->nranks; i++) { 1128 remote[i].rank = sf->ranks[i]; 1129 remote[i].index = 0; 1130 } 1131 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1132 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1133 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1134 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1135 /* Enumerate the incoming ranks */ 1136 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1137 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1138 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1139 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1140 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1141 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1142 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1143 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1144 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1145 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1146 } 1147 *incoming = sf->ingroup; 1148 1149 if (sf->outgroup == MPI_GROUP_NULL) { 1150 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1151 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1152 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1153 } 1154 *outgoing = sf->outgroup; 1155 PetscFunctionReturn(0); 1156 } 1157 1158 /*@ 1159 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1160 1161 Collective 1162 1163 Input Parameter: 1164 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1165 1166 Output Parameter: 1167 . multi - star forest with split roots, such that each root has degree exactly 1 1168 1169 Level: developer 1170 1171 Notes: 1172 1173 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1174 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1175 edge, it is a candidate for future optimization that might involve its removal. 1176 1177 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1178 @*/ 1179 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1180 { 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1185 PetscValidPointer(multi,2); 1186 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1187 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1188 *multi = sf->multi; 1189 sf->multi->multi = sf->multi; 1190 PetscFunctionReturn(0); 1191 } 1192 if (!sf->multi) { 1193 const PetscInt *indegree; 1194 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1195 PetscSFNode *remote; 1196 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1197 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1198 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1199 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1200 inoffset[0] = 0; 1201 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1202 for (i=0; i<maxlocal; i++) outones[i] = 1; 1203 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1204 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1205 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1206 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1207 for (i=0; i<sf->nroots; i++) { 1208 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1209 } 1210 } 1211 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1212 for (i=0; i<sf->nleaves; i++) { 1213 remote[i].rank = sf->remote[i].rank; 1214 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1215 } 1216 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1217 sf->multi->multi = sf->multi; 1218 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1219 if (sf->rankorder) { /* Sort the ranks */ 1220 PetscMPIInt rank; 1221 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1222 PetscSFNode *newremote; 1223 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1224 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1225 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1226 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1227 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1228 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1229 /* Sort the incoming ranks at each vertex, build the inverse map */ 1230 for (i=0; i<sf->nroots; i++) { 1231 PetscInt j; 1232 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1233 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1234 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1235 } 1236 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1237 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1238 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1239 for (i=0; i<sf->nleaves; i++) { 1240 newremote[i].rank = sf->remote[i].rank; 1241 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1242 } 1243 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1244 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1245 } 1246 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1247 } 1248 *multi = sf->multi; 1249 PetscFunctionReturn(0); 1250 } 1251 1252 /*@C 1253 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1254 1255 Collective 1256 1257 Input Parameters: 1258 + sf - original star forest 1259 . nselected - number of selected roots on this process 1260 - selected - indices of the selected roots on this process 1261 1262 Output Parameter: 1263 . esf - new star forest 1264 1265 Level: advanced 1266 1267 Note: 1268 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1269 be done by calling PetscSFGetGraph(). 1270 1271 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1272 @*/ 1273 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1274 { 1275 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1276 const PetscInt *ilocal; 1277 signed char *rootdata,*leafdata,*leafmem; 1278 const PetscSFNode *iremote; 1279 PetscSFNode *new_iremote; 1280 MPI_Comm comm; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1285 PetscSFCheckGraphSet(sf,1); 1286 if (nselected) PetscValidPointer(selected,3); 1287 PetscValidPointer(esf,4); 1288 1289 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1290 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1291 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1292 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1293 1294 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1295 PetscBool dups; 1296 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1297 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1298 for (i=0; i<nselected; i++) 1299 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1300 } 1301 1302 if (sf->ops->CreateEmbeddedRootSF) { 1303 ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1304 } else { 1305 /* A generic version of creating embedded sf */ 1306 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1307 maxlocal = maxleaf - minleaf + 1; 1308 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1309 leafdata = leafmem - minleaf; 1310 /* Tag selected roots and bcast to leaves */ 1311 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1312 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1313 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1314 1315 /* Build esf with leaves that are still connected */ 1316 esf_nleaves = 0; 1317 for (i=0; i<nleaves; i++) { 1318 j = ilocal ? ilocal[i] : i; 1319 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1320 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1321 */ 1322 esf_nleaves += (leafdata[j] ? 1 : 0); 1323 } 1324 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1325 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1326 for (i=n=0; i<nleaves; i++) { 1327 j = ilocal ? ilocal[i] : i; 1328 if (leafdata[j]) { 1329 new_ilocal[n] = j; 1330 new_iremote[n].rank = iremote[i].rank; 1331 new_iremote[n].index = iremote[i].index; 1332 ++n; 1333 } 1334 } 1335 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1336 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1337 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1338 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1339 } 1340 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 /*@C 1345 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1346 1347 Collective 1348 1349 Input Parameters: 1350 + sf - original star forest 1351 . nselected - number of selected leaves on this process 1352 - selected - indices of the selected leaves on this process 1353 1354 Output Parameter: 1355 . newsf - new star forest 1356 1357 Level: advanced 1358 1359 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1360 @*/ 1361 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1362 { 1363 const PetscSFNode *iremote; 1364 PetscSFNode *new_iremote; 1365 const PetscInt *ilocal; 1366 PetscInt i,nroots,*leaves,*new_ilocal; 1367 MPI_Comm comm; 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1372 PetscSFCheckGraphSet(sf,1); 1373 if (nselected) PetscValidPointer(selected,3); 1374 PetscValidPointer(newsf,4); 1375 1376 /* Uniq selected[] and put results in leaves[] */ 1377 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1378 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1379 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1380 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1381 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1382 1383 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1384 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1385 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1386 } else { 1387 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1388 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1389 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1390 for (i=0; i<nselected; ++i) { 1391 const PetscInt l = leaves[i]; 1392 new_ilocal[i] = ilocal ? ilocal[l] : l; 1393 new_iremote[i].rank = iremote[l].rank; 1394 new_iremote[i].index = iremote[l].index; 1395 } 1396 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1397 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1398 } 1399 ierr = PetscFree(leaves);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 /*@C 1404 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1405 1406 Collective on PetscSF 1407 1408 Input Parameters: 1409 + sf - star forest on which to communicate 1410 . unit - data type associated with each node 1411 . rootdata - buffer to broadcast 1412 - op - operation to use for reduction 1413 1414 Output Parameter: 1415 . leafdata - buffer to be reduced with values from each leaf's respective root 1416 1417 Level: intermediate 1418 1419 Notes: 1420 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1421 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1422 use PetscSFBcastWithMemTypeBegin() instead. 1423 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1424 @*/ 1425 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1426 { 1427 PetscErrorCode ierr; 1428 PetscMemType rootmtype,leafmtype; 1429 1430 PetscFunctionBegin; 1431 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1432 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1433 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1434 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1435 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1436 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1437 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1438 PetscFunctionReturn(0); 1439 } 1440 1441 /*@C 1442 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1443 1444 Collective on PetscSF 1445 1446 Input Parameters: 1447 + sf - star forest on which to communicate 1448 . unit - data type associated with each node 1449 . rootmtype - memory type of rootdata 1450 . rootdata - buffer to broadcast 1451 . leafmtype - memory type of leafdata 1452 - op - operation to use for reduction 1453 1454 Output Parameter: 1455 . leafdata - buffer to be reduced with values from each leaf's respective root 1456 1457 Level: intermediate 1458 1459 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1460 @*/ 1461 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1462 { 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1467 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1468 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1469 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1470 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1471 PetscFunctionReturn(0); 1472 } 1473 1474 /*@C 1475 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1476 1477 Collective 1478 1479 Input Parameters: 1480 + sf - star forest 1481 . unit - data type 1482 . rootdata - buffer to broadcast 1483 - op - operation to use for reduction 1484 1485 Output Parameter: 1486 . leafdata - buffer to be reduced with values from each leaf's respective root 1487 1488 Level: intermediate 1489 1490 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1491 @*/ 1492 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1493 { 1494 PetscErrorCode ierr; 1495 1496 PetscFunctionBegin; 1497 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1498 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1499 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1500 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1501 PetscFunctionReturn(0); 1502 } 1503 1504 /*@C 1505 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1506 1507 Collective 1508 1509 Input Parameters: 1510 + sf - star forest 1511 . unit - data type 1512 . leafdata - values to reduce 1513 - op - reduction operation 1514 1515 Output Parameter: 1516 . rootdata - result of reduction of values from all leaves of each root 1517 1518 Level: intermediate 1519 1520 Notes: 1521 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1522 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1523 use PetscSFReduceWithMemTypeBegin() instead. 1524 1525 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1526 @*/ 1527 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1528 { 1529 PetscErrorCode ierr; 1530 PetscMemType rootmtype,leafmtype; 1531 1532 PetscFunctionBegin; 1533 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1534 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1535 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1536 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1537 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1538 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1539 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1540 PetscFunctionReturn(0); 1541 } 1542 1543 /*@C 1544 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1545 1546 Collective 1547 1548 Input Parameters: 1549 + sf - star forest 1550 . unit - data type 1551 . leafmtype - memory type of leafdata 1552 . leafdata - values to reduce 1553 . rootmtype - memory type of rootdata 1554 - op - reduction operation 1555 1556 Output Parameter: 1557 . rootdata - result of reduction of values from all leaves of each root 1558 1559 Level: intermediate 1560 1561 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1562 @*/ 1563 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1564 { 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1569 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1570 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1571 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1572 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1573 PetscFunctionReturn(0); 1574 } 1575 1576 /*@C 1577 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1578 1579 Collective 1580 1581 Input Parameters: 1582 + sf - star forest 1583 . unit - data type 1584 . leafdata - values to reduce 1585 - op - reduction operation 1586 1587 Output Parameter: 1588 . rootdata - result of reduction of values from all leaves of each root 1589 1590 Level: intermediate 1591 1592 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1593 @*/ 1594 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1595 { 1596 PetscErrorCode ierr; 1597 1598 PetscFunctionBegin; 1599 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1600 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1601 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1602 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1603 PetscFunctionReturn(0); 1604 } 1605 1606 /*@C 1607 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1608 1609 Collective 1610 1611 Input Parameters: 1612 + sf - star forest 1613 . unit - data type 1614 . leafdata - leaf values to use in reduction 1615 - op - operation to use for reduction 1616 1617 Output Parameters: 1618 + rootdata - root values to be updated, input state is seen by first process to perform an update 1619 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1620 1621 Level: advanced 1622 1623 Note: 1624 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1625 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1626 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1627 integers. 1628 1629 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1630 @*/ 1631 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1632 { 1633 PetscErrorCode ierr; 1634 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1635 1636 PetscFunctionBegin; 1637 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1638 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1639 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1640 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1641 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1642 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1643 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1644 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1645 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1646 PetscFunctionReturn(0); 1647 } 1648 1649 /*@C 1650 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1651 1652 Collective 1653 1654 Input Parameters: 1655 + sf - star forest 1656 . unit - data type 1657 . rootmtype - memory type of rootdata 1658 . leafmtype - memory type of leafdata 1659 . leafdata - leaf values to use in reduction 1660 . leafupdatemtype - memory type of leafupdate 1661 - op - operation to use for reduction 1662 1663 Output Parameters: 1664 + rootdata - root values to be updated, input state is seen by first process to perform an update 1665 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1666 1667 Level: advanced 1668 1669 Note: See PetscSFFetchAndOpBegin() for more details. 1670 1671 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1672 @*/ 1673 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op) 1674 { 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1679 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1680 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1681 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1682 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1683 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 1687 /*@C 1688 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1689 1690 Collective 1691 1692 Input Parameters: 1693 + sf - star forest 1694 . unit - data type 1695 . leafdata - leaf values to use in reduction 1696 - op - operation to use for reduction 1697 1698 Output Parameters: 1699 + rootdata - root values to be updated, input state is seen by first process to perform an update 1700 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1701 1702 Level: advanced 1703 1704 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1705 @*/ 1706 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1707 { 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1712 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1713 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1714 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 1718 /*@C 1719 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1720 1721 Collective 1722 1723 Input Parameter: 1724 . sf - star forest 1725 1726 Output Parameter: 1727 . degree - degree of each root vertex 1728 1729 Level: advanced 1730 1731 Notes: 1732 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1733 1734 .seealso: PetscSFGatherBegin() 1735 @*/ 1736 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1737 { 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1742 PetscSFCheckGraphSet(sf,1); 1743 PetscValidPointer(degree,2); 1744 if (!sf->degreeknown) { 1745 PetscInt i, nroots = sf->nroots, maxlocal; 1746 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1747 maxlocal = sf->maxleaf-sf->minleaf+1; 1748 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1749 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1750 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1751 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1752 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1753 } 1754 *degree = NULL; 1755 PetscFunctionReturn(0); 1756 } 1757 1758 /*@C 1759 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1760 1761 Collective 1762 1763 Input Parameter: 1764 . sf - star forest 1765 1766 Output Parameter: 1767 . degree - degree of each root vertex 1768 1769 Level: developer 1770 1771 Notes: 1772 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1773 1774 .seealso: 1775 @*/ 1776 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1777 { 1778 PetscErrorCode ierr; 1779 1780 PetscFunctionBegin; 1781 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1782 PetscSFCheckGraphSet(sf,1); 1783 PetscValidPointer(degree,2); 1784 if (!sf->degreeknown) { 1785 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1786 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1787 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1788 sf->degreeknown = PETSC_TRUE; 1789 } 1790 *degree = sf->degree; 1791 PetscFunctionReturn(0); 1792 } 1793 1794 /*@C 1795 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1796 Each multi-root is assigned index of the corresponding original root. 1797 1798 Collective 1799 1800 Input Parameters: 1801 + sf - star forest 1802 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1803 1804 Output Parameters: 1805 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1806 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1807 1808 Level: developer 1809 1810 Notes: 1811 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1812 1813 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1814 @*/ 1815 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1816 { 1817 PetscSF msf; 1818 PetscInt i, j, k, nroots, nmroots; 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1823 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1824 if (nroots) PetscValidIntPointer(degree,2); 1825 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1826 PetscValidPointer(multiRootsOrigNumbering,4); 1827 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1828 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1829 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1830 for (i=0,j=0,k=0; i<nroots; i++) { 1831 if (!degree[i]) continue; 1832 for (j=0; j<degree[i]; j++,k++) { 1833 (*multiRootsOrigNumbering)[k] = i; 1834 } 1835 } 1836 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1837 if (nMultiRoots) *nMultiRoots = nmroots; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 /*@C 1842 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1843 1844 Collective 1845 1846 Input Parameters: 1847 + sf - star forest 1848 . unit - data type 1849 - leafdata - leaf data to gather to roots 1850 1851 Output Parameter: 1852 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1853 1854 Level: intermediate 1855 1856 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1857 @*/ 1858 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1859 { 1860 PetscErrorCode ierr; 1861 PetscSF multi = NULL; 1862 1863 PetscFunctionBegin; 1864 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1865 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1866 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1867 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1868 PetscFunctionReturn(0); 1869 } 1870 1871 /*@C 1872 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1873 1874 Collective 1875 1876 Input Parameters: 1877 + sf - star forest 1878 . unit - data type 1879 - leafdata - leaf data to gather to roots 1880 1881 Output Parameter: 1882 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1883 1884 Level: intermediate 1885 1886 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1887 @*/ 1888 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1889 { 1890 PetscErrorCode ierr; 1891 PetscSF multi = NULL; 1892 1893 PetscFunctionBegin; 1894 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1895 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1896 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 /*@C 1901 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1902 1903 Collective 1904 1905 Input Parameters: 1906 + sf - star forest 1907 . unit - data type 1908 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1909 1910 Output Parameter: 1911 . leafdata - leaf data to be update with personal data from each respective root 1912 1913 Level: intermediate 1914 1915 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1916 @*/ 1917 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1918 { 1919 PetscErrorCode ierr; 1920 PetscSF multi = NULL; 1921 1922 PetscFunctionBegin; 1923 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1924 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1925 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1926 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 /*@C 1931 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1932 1933 Collective 1934 1935 Input Parameters: 1936 + sf - star forest 1937 . unit - data type 1938 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1939 1940 Output Parameter: 1941 . leafdata - leaf data to be update with personal data from each respective root 1942 1943 Level: intermediate 1944 1945 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1946 @*/ 1947 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1948 { 1949 PetscErrorCode ierr; 1950 PetscSF multi = NULL; 1951 1952 PetscFunctionBegin; 1953 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1954 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1955 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1956 PetscFunctionReturn(0); 1957 } 1958 1959 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1960 { 1961 PetscInt i, n, nleaves; 1962 const PetscInt *ilocal = NULL; 1963 PetscHSetI seen; 1964 PetscErrorCode ierr; 1965 1966 PetscFunctionBegin; 1967 if (PetscDefined(USE_DEBUG)) { 1968 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1969 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1970 for (i = 0; i < nleaves; i++) { 1971 const PetscInt leaf = ilocal ? ilocal[i] : i; 1972 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1973 } 1974 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1975 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1976 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1977 } 1978 PetscFunctionReturn(0); 1979 } 1980 1981 /*@ 1982 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1983 1984 Input Parameters: 1985 + sfA - The first PetscSF 1986 - sfB - The second PetscSF 1987 1988 Output Parameters: 1989 . sfBA - The composite SF 1990 1991 Level: developer 1992 1993 Notes: 1994 Currently, the two SFs must be defined on congruent communicators and they must be true star 1995 forests, i.e. the same leaf is not connected with different roots. 1996 1997 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1998 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1999 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 2000 Bcast on sfA, then a Bcast on sfB, on connected nodes. 2001 2002 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 2003 @*/ 2004 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2005 { 2006 PetscErrorCode ierr; 2007 const PetscSFNode *remotePointsA,*remotePointsB; 2008 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 2009 const PetscInt *localPointsA,*localPointsB; 2010 PetscInt *localPointsBA; 2011 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 2012 PetscBool denseB; 2013 2014 PetscFunctionBegin; 2015 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2016 PetscSFCheckGraphSet(sfA,1); 2017 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2018 PetscSFCheckGraphSet(sfB,2); 2019 PetscCheckSameComm(sfA,1,sfB,2); 2020 PetscValidPointer(sfBA,3); 2021 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2022 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2023 2024 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2025 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2026 if (localPointsA) { 2027 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2028 for (i=0; i<numRootsB; i++) { 2029 reorderedRemotePointsA[i].rank = -1; 2030 reorderedRemotePointsA[i].index = -1; 2031 } 2032 for (i=0; i<numLeavesA; i++) { 2033 if (localPointsA[i] >= numRootsB) continue; 2034 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2035 } 2036 remotePointsA = reorderedRemotePointsA; 2037 } 2038 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2039 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2040 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2041 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2042 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2043 2044 denseB = (PetscBool)!localPointsB; 2045 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2046 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2047 else numLeavesBA++; 2048 } 2049 if (denseB) { 2050 localPointsBA = NULL; 2051 remotePointsBA = leafdataB; 2052 } else { 2053 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2054 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2055 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2056 const PetscInt l = localPointsB ? localPointsB[i] : i; 2057 2058 if (leafdataB[l-minleaf].rank == -1) continue; 2059 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2060 localPointsBA[numLeavesBA] = l; 2061 numLeavesBA++; 2062 } 2063 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2064 } 2065 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2066 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2067 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2068 PetscFunctionReturn(0); 2069 } 2070 2071 /*@ 2072 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2073 2074 Input Parameters: 2075 + sfA - The first PetscSF 2076 - sfB - The second PetscSF 2077 2078 Output Parameters: 2079 . sfBA - The composite SF. 2080 2081 Level: developer 2082 2083 Notes: 2084 Currently, the two SFs must be defined on congruent communicators and they must be true star 2085 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2086 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2087 2088 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2089 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2090 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2091 a Reduce on sfB, on connected roots. 2092 2093 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2094 @*/ 2095 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2096 { 2097 PetscErrorCode ierr; 2098 const PetscSFNode *remotePointsA,*remotePointsB; 2099 PetscSFNode *remotePointsBA; 2100 const PetscInt *localPointsA,*localPointsB; 2101 PetscSFNode *reorderedRemotePointsA = NULL; 2102 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2103 MPI_Op op; 2104 #if defined(PETSC_USE_64BIT_INDICES) 2105 PetscBool iswin; 2106 #endif 2107 2108 PetscFunctionBegin; 2109 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2110 PetscSFCheckGraphSet(sfA,1); 2111 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2112 PetscSFCheckGraphSet(sfB,2); 2113 PetscCheckSameComm(sfA,1,sfB,2); 2114 PetscValidPointer(sfBA,3); 2115 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2116 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2117 2118 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2119 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2120 2121 /* TODO: Check roots of sfB have degree of 1 */ 2122 /* Once we implement it, we can replace the MPI_MAXLOC 2123 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2124 We use MPI_MAXLOC only to have a deterministic output from this routine if 2125 the root condition is not meet. 2126 */ 2127 op = MPI_MAXLOC; 2128 #if defined(PETSC_USE_64BIT_INDICES) 2129 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2130 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2131 if (iswin) op = MPI_REPLACE; 2132 #endif 2133 2134 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2135 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2136 for (i=0; i<maxleaf - minleaf + 1; i++) { 2137 reorderedRemotePointsA[i].rank = -1; 2138 reorderedRemotePointsA[i].index = -1; 2139 } 2140 if (localPointsA) { 2141 for (i=0; i<numLeavesA; i++) { 2142 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2143 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2144 } 2145 } else { 2146 for (i=0; i<numLeavesA; i++) { 2147 if (i > maxleaf || i < minleaf) continue; 2148 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2149 } 2150 } 2151 2152 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2153 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2154 for (i=0; i<numRootsB; i++) { 2155 remotePointsBA[i].rank = -1; 2156 remotePointsBA[i].index = -1; 2157 } 2158 2159 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2160 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2161 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2162 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2163 if (remotePointsBA[i].rank == -1) continue; 2164 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2165 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2166 localPointsBA[numLeavesBA] = i; 2167 numLeavesBA++; 2168 } 2169 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2170 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2171 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2172 PetscFunctionReturn(0); 2173 } 2174 2175 /* 2176 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2177 2178 Input Parameters: 2179 . sf - The global PetscSF 2180 2181 Output Parameters: 2182 . out - The local PetscSF 2183 */ 2184 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2185 { 2186 MPI_Comm comm; 2187 PetscMPIInt myrank; 2188 const PetscInt *ilocal; 2189 const PetscSFNode *iremote; 2190 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2191 PetscSFNode *liremote; 2192 PetscSF lsf; 2193 PetscErrorCode ierr; 2194 2195 PetscFunctionBegin; 2196 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2197 if (sf->ops->CreateLocalSF) { 2198 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2199 } else { 2200 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2201 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2202 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2203 2204 /* Find out local edges and build a local SF */ 2205 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2206 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2207 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2208 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2209 2210 for (i=j=0; i<nleaves; i++) { 2211 if (iremote[i].rank == (PetscInt)myrank) { 2212 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2213 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2214 liremote[j].index = iremote[i].index; 2215 j++; 2216 } 2217 } 2218 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2219 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2220 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2221 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2222 *out = lsf; 2223 } 2224 PetscFunctionReturn(0); 2225 } 2226 2227 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2228 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2229 { 2230 PetscErrorCode ierr; 2231 PetscMemType rootmtype,leafmtype; 2232 2233 PetscFunctionBegin; 2234 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2235 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2236 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2237 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2238 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2239 if (sf->ops->BcastToZero) { 2240 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2241 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2242 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2243 PetscFunctionReturn(0); 2244 } 2245 2246