1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) do { \ 19 if (PetscUnlikely(!(sf)->graphset)) \ 20 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 21 } while (0) 22 #else 23 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window -Use MPI-3 one-sided window for communication 43 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 44 45 Level: intermediate 46 47 Notes: 48 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 49 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 50 SFs are optimized and they have better performance than general SFs. 51 52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 53 @*/ 54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 55 { 56 PetscErrorCode ierr; 57 PetscSF b; 58 59 PetscFunctionBegin; 60 PetscValidPointer(sf,2); 61 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 62 63 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 64 65 b->nroots = -1; 66 b->nleaves = -1; 67 b->minleaf = PETSC_MAX_INT; 68 b->maxleaf = PETSC_MIN_INT; 69 b->nranks = -1; 70 b->rankorder = PETSC_TRUE; 71 b->ingroup = MPI_GROUP_NULL; 72 b->outgroup = MPI_GROUP_NULL; 73 b->graphset = PETSC_FALSE; 74 #if defined(PETSC_HAVE_DEVICE) 75 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 76 b->use_stream_aware_mpi = PETSC_FALSE; 77 b->unknown_input_stream= PETSC_FALSE; 78 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 79 b->backend = PETSCSF_BACKEND_KOKKOS; 80 #elif defined(PETSC_HAVE_CUDA) 81 b->backend = PETSCSF_BACKEND_CUDA; 82 #elif defined(PETSC_HAVE_HIP) 83 b->backend = PETSCSF_BACKEND_HIP; 84 #endif 85 86 #if defined(PETSC_HAVE_NVSHMEM) 87 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 88 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 89 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 90 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 91 #endif 92 #endif 93 b->vscat.from_n = -1; 94 b->vscat.to_n = -1; 95 b->vscat.unit = MPIU_SCALAR; 96 *sf = b; 97 PetscFunctionReturn(0); 98 } 99 100 /*@ 101 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 102 103 Collective 104 105 Input Parameter: 106 . sf - star forest 107 108 Level: advanced 109 110 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 111 @*/ 112 PetscErrorCode PetscSFReset(PetscSF sf) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 118 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_MAX_INT; 122 sf->maxleaf = PETSC_MIN_INT; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 127 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 128 sf->nranks = -1; 129 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 130 sf->degreeknown = PETSC_FALSE; 131 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 132 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 133 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 134 if (sf->multi) sf->multi->multi = NULL; 135 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 136 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 137 138 #if defined(PETSC_HAVE_DEVICE) 139 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 140 #endif 141 142 sf->setupcalled = PETSC_FALSE; 143 PetscFunctionReturn(0); 144 } 145 146 /*@C 147 PetscSFSetType - Set the PetscSF communication implementation 148 149 Collective on PetscSF 150 151 Input Parameters: 152 + sf - the PetscSF context 153 - type - a known method 154 155 Options Database Key: 156 . -sf_type <type> - Sets the method; use -help for a list 157 of available methods (for instance, window, basic, neighbor) 158 159 Notes: 160 See "include/petscsf.h" for available methods (for instance) 161 + PETSCSFWINDOW - MPI-2/3 one-sided 162 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 163 164 Level: intermediate 165 166 .seealso: PetscSFType, PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 169 { 170 PetscErrorCode ierr,(*r)(PetscSF); 171 PetscBool match; 172 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 175 PetscValidCharPointer(type,2); 176 177 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 178 if (match) PetscFunctionReturn(0); 179 180 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 181 PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 182 /* Destroy the previous PetscSF implementation context */ 183 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 184 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 185 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 186 ierr = (*r)(sf);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 /*@C 191 PetscSFGetType - Get the PetscSF communication implementation 192 193 Not Collective 194 195 Input Parameter: 196 . sf - the PetscSF context 197 198 Output Parameter: 199 . type - the PetscSF type name 200 201 Level: intermediate 202 203 .seealso: PetscSFSetType(), PetscSFCreate() 204 @*/ 205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 206 { 207 PetscFunctionBegin; 208 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 209 PetscValidPointer(type,2); 210 *type = ((PetscObject)sf)->type_name; 211 PetscFunctionReturn(0); 212 } 213 214 /*@C 215 PetscSFDestroy - destroy star forest 216 217 Collective 218 219 Input Parameter: 220 . sf - address of star forest 221 222 Level: intermediate 223 224 .seealso: PetscSFCreate(), PetscSFReset() 225 @*/ 226 PetscErrorCode PetscSFDestroy(PetscSF *sf) 227 { 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 if (!*sf) PetscFunctionReturn(0); 232 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 233 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 234 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 235 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 236 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 237 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 238 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 243 { 244 PetscInt i, nleaves; 245 PetscMPIInt size; 246 const PetscInt *ilocal; 247 const PetscSFNode *iremote; 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 252 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 254 for (i = 0; i < nleaves; i++) { 255 const PetscInt rank = iremote[i].rank; 256 const PetscInt remote = iremote[i].index; 257 const PetscInt leaf = ilocal ? ilocal[i] : i; 258 PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 259 PetscCheckFalse(remote < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 260 PetscCheckFalse(leaf < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 261 } 262 PetscFunctionReturn(0); 263 } 264 265 /*@ 266 PetscSFSetUp - set up communication structures 267 268 Collective 269 270 Input Parameter: 271 . sf - star forest communication object 272 273 Level: beginner 274 275 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 276 @*/ 277 PetscErrorCode PetscSFSetUp(PetscSF sf) 278 { 279 PetscErrorCode ierr; 280 281 PetscFunctionBegin; 282 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 283 PetscSFCheckGraphSet(sf,1); 284 if (sf->setupcalled) PetscFunctionReturn(0); 285 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 286 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 287 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 288 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 289 #if defined(PETSC_HAVE_CUDA) 290 if (sf->backend == PETSCSF_BACKEND_CUDA) { 291 sf->ops->Malloc = PetscSFMalloc_CUDA; 292 sf->ops->Free = PetscSFFree_CUDA; 293 } 294 #endif 295 #if defined(PETSC_HAVE_HIP) 296 if (sf->backend == PETSCSF_BACKEND_HIP) { 297 sf->ops->Malloc = PetscSFMalloc_HIP; 298 sf->ops->Free = PetscSFFree_HIP; 299 } 300 #endif 301 302 # 303 #if defined(PETSC_HAVE_KOKKOS) 304 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 305 sf->ops->Malloc = PetscSFMalloc_Kokkos; 306 sf->ops->Free = PetscSFFree_Kokkos; 307 } 308 #endif 309 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 310 sf->setupcalled = PETSC_TRUE; 311 PetscFunctionReturn(0); 312 } 313 314 /*@ 315 PetscSFSetFromOptions - set PetscSF options using the options database 316 317 Logically Collective 318 319 Input Parameter: 320 . sf - star forest 321 322 Options Database Keys: 323 + -sf_type - implementation type, see PetscSFSetType() 324 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 325 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 326 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 327 If true, this option only works with -use_gpu_aware_mpi 1. 328 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 329 If true, this option only works with -use_gpu_aware_mpi 1. 330 331 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 332 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 333 the only available is kokkos. 334 335 Level: intermediate 336 @*/ 337 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 338 { 339 PetscSFType deft; 340 char type[256]; 341 PetscErrorCode ierr; 342 PetscBool flg; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 346 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 347 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 348 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 349 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 350 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 351 #if defined(PETSC_HAVE_DEVICE) 352 { 353 char backendstr[32] = {0}; 354 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 355 /* Change the defaults set in PetscSFCreate() with command line options */ 356 ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 357 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 358 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 359 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 360 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 361 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 362 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 363 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 364 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 365 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 366 else PetscCheckFalse(set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 367 #elif defined(PETSC_HAVE_KOKKOS) 368 PetscCheckFalse(set && !isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 369 #endif 370 } 371 #endif 372 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 373 ierr = PetscOptionsEnd();CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 /*@ 378 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 379 380 Logically Collective 381 382 Input Parameters: 383 + sf - star forest 384 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 385 386 Level: advanced 387 388 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 389 @*/ 390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 391 { 392 PetscFunctionBegin; 393 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 394 PetscValidLogicalCollectiveBool(sf,flg,2); 395 PetscCheckFalse(sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 396 sf->rankorder = flg; 397 PetscFunctionReturn(0); 398 } 399 400 /*@C 401 PetscSFSetGraph - Set a parallel star forest 402 403 Collective 404 405 Input Parameters: 406 + sf - star forest 407 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 408 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 409 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 410 during setup in debug mode) 411 . localmode - copy mode for ilocal 412 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 413 during setup in debug mode) 414 - remotemode - copy mode for iremote 415 416 Level: intermediate 417 418 Notes: 419 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 420 421 This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 422 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 423 424 Developer Notes: 425 Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 426 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 427 needed. 428 429 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 430 @*/ 431 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 432 { 433 PetscErrorCode ierr; 434 435 PetscFunctionBegin; 436 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 437 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 438 if (nleaves > 0) PetscValidPointer(iremote,6); 439 PetscCheckFalse(nroots < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 440 PetscCheckFalse(nleaves < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 441 442 if (sf->nroots >= 0) { /* Reset only if graph already set */ 443 ierr = PetscSFReset(sf);CHKERRQ(ierr); 444 } 445 446 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 447 448 sf->nroots = nroots; 449 sf->nleaves = nleaves; 450 451 if (nleaves && ilocal) { 452 PetscInt i; 453 PetscInt minleaf = PETSC_MAX_INT; 454 PetscInt maxleaf = PETSC_MIN_INT; 455 int contiguous = 1; 456 for (i=0; i<nleaves; i++) { 457 minleaf = PetscMin(minleaf,ilocal[i]); 458 maxleaf = PetscMax(maxleaf,ilocal[i]); 459 contiguous &= (ilocal[i] == i); 460 } 461 sf->minleaf = minleaf; 462 sf->maxleaf = maxleaf; 463 if (contiguous) { 464 if (localmode == PETSC_OWN_POINTER) { 465 ierr = PetscFree(ilocal);CHKERRQ(ierr); 466 } 467 ilocal = NULL; 468 } 469 } else { 470 sf->minleaf = 0; 471 sf->maxleaf = nleaves - 1; 472 } 473 474 if (ilocal) { 475 switch (localmode) { 476 case PETSC_COPY_VALUES: 477 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 478 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 479 sf->mine = sf->mine_alloc; 480 break; 481 case PETSC_OWN_POINTER: 482 sf->mine_alloc = (PetscInt*)ilocal; 483 sf->mine = sf->mine_alloc; 484 break; 485 case PETSC_USE_POINTER: 486 sf->mine_alloc = NULL; 487 sf->mine = (PetscInt*)ilocal; 488 break; 489 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 490 } 491 } 492 493 switch (remotemode) { 494 case PETSC_COPY_VALUES: 495 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 496 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 497 sf->remote = sf->remote_alloc; 498 break; 499 case PETSC_OWN_POINTER: 500 sf->remote_alloc = (PetscSFNode*)iremote; 501 sf->remote = sf->remote_alloc; 502 break; 503 case PETSC_USE_POINTER: 504 sf->remote_alloc = NULL; 505 sf->remote = (PetscSFNode*)iremote; 506 break; 507 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 508 } 509 510 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 511 sf->graphset = PETSC_TRUE; 512 PetscFunctionReturn(0); 513 } 514 515 /*@ 516 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 517 518 Collective 519 520 Input Parameters: 521 + sf - The PetscSF 522 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 523 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 524 525 Notes: 526 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 527 n and N are local and global sizes of x respectively. 528 529 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 530 sequential vectors y on all ranks. 531 532 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 533 sequential vector y on rank 0. 534 535 In above cases, entries of x are roots and entries of y are leaves. 536 537 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 538 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 539 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 540 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 541 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 542 543 In this case, roots and leaves are symmetric. 544 545 Level: intermediate 546 @*/ 547 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 548 { 549 MPI_Comm comm; 550 PetscInt n,N,res[2]; 551 PetscMPIInt rank,size; 552 PetscSFType type; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 557 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 558 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 559 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 560 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 561 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 562 563 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 564 type = PETSCSFALLTOALL; 565 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 566 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 567 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 568 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 569 } else { 570 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 571 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 572 res[0] = n; 573 res[1] = -n; 574 /* Check if n are same over all ranks so that we can optimize it */ 575 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 576 if (res[0] == -res[1]) { /* same n */ 577 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 578 } else { 579 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 580 } 581 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 582 } 583 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 584 585 sf->pattern = pattern; 586 sf->mine = NULL; /* Contiguous */ 587 588 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 589 Also set other easy stuff. 590 */ 591 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 592 sf->nleaves = N; 593 sf->nroots = n; 594 sf->nranks = size; 595 sf->minleaf = 0; 596 sf->maxleaf = N - 1; 597 } else if (pattern == PETSCSF_PATTERN_GATHER) { 598 sf->nleaves = rank ? 0 : N; 599 sf->nroots = n; 600 sf->nranks = rank ? 0 : size; 601 sf->minleaf = 0; 602 sf->maxleaf = rank ? -1 : N - 1; 603 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 604 sf->nleaves = size; 605 sf->nroots = size; 606 sf->nranks = size; 607 sf->minleaf = 0; 608 sf->maxleaf = size - 1; 609 } 610 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 611 sf->graphset = PETSC_TRUE; 612 PetscFunctionReturn(0); 613 } 614 615 /*@ 616 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 617 618 Collective 619 620 Input Parameter: 621 . sf - star forest to invert 622 623 Output Parameter: 624 . isf - inverse of sf 625 626 Level: advanced 627 628 Notes: 629 All roots must have degree 1. 630 631 The local space may be a permutation, but cannot be sparse. 632 633 .seealso: PetscSFSetGraph() 634 @*/ 635 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 636 { 637 PetscErrorCode ierr; 638 PetscMPIInt rank; 639 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 640 const PetscInt *ilocal; 641 PetscSFNode *roots,*leaves; 642 643 PetscFunctionBegin; 644 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 645 PetscSFCheckGraphSet(sf,1); 646 PetscValidPointer(isf,2); 647 648 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 649 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 650 651 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 652 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 653 for (i=0; i<maxlocal; i++) { 654 leaves[i].rank = rank; 655 leaves[i].index = i; 656 } 657 for (i=0; i <nroots; i++) { 658 roots[i].rank = -1; 659 roots[i].index = -1; 660 } 661 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 662 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 663 664 /* Check whether our leaves are sparse */ 665 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 666 if (count == nroots) newilocal = NULL; 667 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 668 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 669 for (i=0,count=0; i<nroots; i++) { 670 if (roots[i].rank >= 0) { 671 newilocal[count] = i; 672 roots[count].rank = roots[i].rank; 673 roots[count].index = roots[i].index; 674 count++; 675 } 676 } 677 } 678 679 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 680 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 681 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 /*@ 686 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 687 688 Collective 689 690 Input Parameters: 691 + sf - communication object to duplicate 692 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 693 694 Output Parameter: 695 . newsf - new communication object 696 697 Level: beginner 698 699 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 700 @*/ 701 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 702 { 703 PetscSFType type; 704 PetscErrorCode ierr; 705 MPI_Datatype dtype=MPIU_SCALAR; 706 707 PetscFunctionBegin; 708 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 709 PetscValidLogicalCollectiveEnum(sf,opt,2); 710 PetscValidPointer(newsf,3); 711 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 712 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 713 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 714 if (opt == PETSCSF_DUPLICATE_GRAPH) { 715 PetscSFCheckGraphSet(sf,1); 716 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 717 PetscInt nroots,nleaves; 718 const PetscInt *ilocal; 719 const PetscSFNode *iremote; 720 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 721 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 722 } else { 723 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 724 } 725 } 726 /* Since oldtype is committed, so is newtype, according to MPI */ 727 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 728 (*newsf)->vscat.bs = sf->vscat.bs; 729 (*newsf)->vscat.unit = dtype; 730 (*newsf)->vscat.to_n = sf->vscat.to_n; 731 (*newsf)->vscat.from_n = sf->vscat.from_n; 732 /* Do not copy lsf. Build it on demand since it is rarely used */ 733 734 #if defined(PETSC_HAVE_DEVICE) 735 (*newsf)->backend = sf->backend; 736 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 737 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 738 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 739 #endif 740 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 741 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 742 PetscFunctionReturn(0); 743 } 744 745 /*@C 746 PetscSFGetGraph - Get the graph specifying a parallel star forest 747 748 Not Collective 749 750 Input Parameter: 751 . sf - star forest 752 753 Output Parameters: 754 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 755 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 756 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 757 - iremote - remote locations of root vertices for each leaf on the current process 758 759 Notes: 760 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 761 762 Fortran Notes: 763 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 764 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 765 766 To check for a NULL ilocal use 767 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 768 769 Level: intermediate 770 771 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 772 @*/ 773 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 774 { 775 PetscErrorCode ierr; 776 777 PetscFunctionBegin; 778 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 779 if (sf->ops->GetGraph) { 780 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 781 } else { 782 if (nroots) *nroots = sf->nroots; 783 if (nleaves) *nleaves = sf->nleaves; 784 if (ilocal) *ilocal = sf->mine; 785 if (iremote) *iremote = sf->remote; 786 } 787 PetscFunctionReturn(0); 788 } 789 790 /*@ 791 PetscSFGetLeafRange - Get the active leaf ranges 792 793 Not Collective 794 795 Input Parameter: 796 . sf - star forest 797 798 Output Parameters: 799 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 800 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 801 802 Level: developer 803 804 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 805 @*/ 806 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 807 { 808 PetscFunctionBegin; 809 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 810 PetscSFCheckGraphSet(sf,1); 811 if (minleaf) *minleaf = sf->minleaf; 812 if (maxleaf) *maxleaf = sf->maxleaf; 813 PetscFunctionReturn(0); 814 } 815 816 /*@C 817 PetscSFViewFromOptions - View from Options 818 819 Collective on PetscSF 820 821 Input Parameters: 822 + A - the star forest 823 . obj - Optional object 824 - name - command line option 825 826 Level: intermediate 827 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 828 @*/ 829 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 830 { 831 PetscErrorCode ierr; 832 833 PetscFunctionBegin; 834 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 835 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 836 PetscFunctionReturn(0); 837 } 838 839 /*@C 840 PetscSFView - view a star forest 841 842 Collective 843 844 Input Parameters: 845 + sf - star forest 846 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 847 848 Level: beginner 849 850 .seealso: PetscSFCreate(), PetscSFSetGraph() 851 @*/ 852 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 853 { 854 PetscErrorCode ierr; 855 PetscBool iascii; 856 PetscViewerFormat format; 857 858 PetscFunctionBegin; 859 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 860 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 861 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 862 PetscCheckSameComm(sf,1,viewer,2); 863 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 864 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 865 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 866 PetscMPIInt rank; 867 PetscInt ii,i,j; 868 869 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 870 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 871 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 872 if (!sf->graphset) { 873 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 874 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 878 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 879 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 880 for (i=0; i<sf->nleaves; i++) { 881 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 882 } 883 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 884 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 885 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 886 PetscMPIInt *tmpranks,*perm; 887 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 888 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 889 for (i=0; i<sf->nranks; i++) perm[i] = i; 890 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 892 for (ii=0; ii<sf->nranks; ii++) { 893 i = perm[ii]; 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 895 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 897 } 898 } 899 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 900 } 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 903 } 904 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 905 } 906 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 907 PetscFunctionReturn(0); 908 } 909 910 /*@C 911 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 912 913 Not Collective 914 915 Input Parameter: 916 . sf - star forest 917 918 Output Parameters: 919 + nranks - number of ranks referenced by local part 920 . ranks - array of ranks 921 . roffset - offset in rmine/rremote for each rank (length nranks+1) 922 . rmine - concatenated array holding local indices referencing each remote rank 923 - rremote - concatenated array holding remote indices referenced for each remote rank 924 925 Level: developer 926 927 .seealso: PetscSFGetLeafRanks() 928 @*/ 929 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 930 { 931 PetscErrorCode ierr; 932 933 PetscFunctionBegin; 934 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 935 PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 936 if (sf->ops->GetRootRanks) { 937 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 938 } else { 939 /* The generic implementation */ 940 if (nranks) *nranks = sf->nranks; 941 if (ranks) *ranks = sf->ranks; 942 if (roffset) *roffset = sf->roffset; 943 if (rmine) *rmine = sf->rmine; 944 if (rremote) *rremote = sf->rremote; 945 } 946 PetscFunctionReturn(0); 947 } 948 949 /*@C 950 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 951 952 Not Collective 953 954 Input Parameter: 955 . sf - star forest 956 957 Output Parameters: 958 + niranks - number of leaf ranks referencing roots on this process 959 . iranks - array of ranks 960 . ioffset - offset in irootloc for each rank (length niranks+1) 961 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 962 963 Level: developer 964 965 .seealso: PetscSFGetRootRanks() 966 @*/ 967 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 968 { 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 973 PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 974 if (sf->ops->GetLeafRanks) { 975 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 976 } else { 977 PetscSFType type; 978 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 979 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 980 } 981 PetscFunctionReturn(0); 982 } 983 984 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 985 PetscInt i; 986 for (i=0; i<n; i++) { 987 if (needle == list[i]) return PETSC_TRUE; 988 } 989 return PETSC_FALSE; 990 } 991 992 /*@C 993 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 994 995 Collective 996 997 Input Parameters: 998 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 999 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1000 1001 Level: developer 1002 1003 .seealso: PetscSFGetRootRanks() 1004 @*/ 1005 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1006 { 1007 PetscErrorCode ierr; 1008 PetscTable table; 1009 PetscTablePosition pos; 1010 PetscMPIInt size,groupsize,*groupranks; 1011 PetscInt *rcount,*ranks; 1012 PetscInt i, irank = -1,orank = -1; 1013 1014 PetscFunctionBegin; 1015 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1016 PetscSFCheckGraphSet(sf,1); 1017 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1018 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1019 for (i=0; i<sf->nleaves; i++) { 1020 /* Log 1-based rank */ 1021 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1022 } 1023 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1024 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1025 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1026 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1027 for (i=0; i<sf->nranks; i++) { 1028 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1029 ranks[i]--; /* Convert back to 0-based */ 1030 } 1031 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1032 1033 /* We expect that dgroup is reliably "small" while nranks could be large */ 1034 { 1035 MPI_Group group = MPI_GROUP_NULL; 1036 PetscMPIInt *dgroupranks; 1037 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1038 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1039 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1040 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1041 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1042 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1043 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1044 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1045 } 1046 1047 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1048 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1049 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1050 if (InList(ranks[i],groupsize,groupranks)) break; 1051 } 1052 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1053 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1054 } 1055 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1056 PetscInt tmprank,tmpcount; 1057 1058 tmprank = ranks[i]; 1059 tmpcount = rcount[i]; 1060 ranks[i] = ranks[sf->ndranks]; 1061 rcount[i] = rcount[sf->ndranks]; 1062 ranks[sf->ndranks] = tmprank; 1063 rcount[sf->ndranks] = tmpcount; 1064 sf->ndranks++; 1065 } 1066 } 1067 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1068 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1069 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1070 sf->roffset[0] = 0; 1071 for (i=0; i<sf->nranks; i++) { 1072 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1073 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1074 rcount[i] = 0; 1075 } 1076 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1077 /* short circuit */ 1078 if (orank != sf->remote[i].rank) { 1079 /* Search for index of iremote[i].rank in sf->ranks */ 1080 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1081 if (irank < 0) { 1082 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1083 if (irank >= 0) irank += sf->ndranks; 1084 } 1085 orank = sf->remote[i].rank; 1086 } 1087 PetscCheckFalse(irank < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1088 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1089 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1090 rcount[irank]++; 1091 } 1092 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 /*@C 1097 PetscSFGetGroups - gets incoming and outgoing process groups 1098 1099 Collective 1100 1101 Input Parameter: 1102 . sf - star forest 1103 1104 Output Parameters: 1105 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1106 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1107 1108 Level: developer 1109 1110 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1111 @*/ 1112 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1113 { 1114 PetscErrorCode ierr; 1115 MPI_Group group = MPI_GROUP_NULL; 1116 1117 PetscFunctionBegin; 1118 PetscCheckFalse(sf->nranks < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1119 if (sf->ingroup == MPI_GROUP_NULL) { 1120 PetscInt i; 1121 const PetscInt *indegree; 1122 PetscMPIInt rank,*outranks,*inranks; 1123 PetscSFNode *remote; 1124 PetscSF bgcount; 1125 1126 /* Compute the number of incoming ranks */ 1127 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1128 for (i=0; i<sf->nranks; i++) { 1129 remote[i].rank = sf->ranks[i]; 1130 remote[i].index = 0; 1131 } 1132 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1133 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1134 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1135 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1136 /* Enumerate the incoming ranks */ 1137 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1138 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1139 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1140 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1141 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1142 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1143 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1144 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1145 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1146 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1147 } 1148 *incoming = sf->ingroup; 1149 1150 if (sf->outgroup == MPI_GROUP_NULL) { 1151 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1152 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1153 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1154 } 1155 *outgoing = sf->outgroup; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 /*@ 1160 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1161 1162 Collective 1163 1164 Input Parameter: 1165 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1166 1167 Output Parameter: 1168 . multi - star forest with split roots, such that each root has degree exactly 1 1169 1170 Level: developer 1171 1172 Notes: 1173 1174 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1175 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1176 edge, it is a candidate for future optimization that might involve its removal. 1177 1178 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1179 @*/ 1180 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1181 { 1182 PetscErrorCode ierr; 1183 1184 PetscFunctionBegin; 1185 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1186 PetscValidPointer(multi,2); 1187 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1188 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1189 *multi = sf->multi; 1190 sf->multi->multi = sf->multi; 1191 PetscFunctionReturn(0); 1192 } 1193 if (!sf->multi) { 1194 const PetscInt *indegree; 1195 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1196 PetscSFNode *remote; 1197 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1198 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1199 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1200 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1201 inoffset[0] = 0; 1202 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1203 for (i=0; i<maxlocal; i++) outones[i] = 1; 1204 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1205 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1206 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1207 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1208 for (i=0; i<sf->nroots; i++) { 1209 PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1210 } 1211 } 1212 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1213 for (i=0; i<sf->nleaves; i++) { 1214 remote[i].rank = sf->remote[i].rank; 1215 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1216 } 1217 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1218 sf->multi->multi = sf->multi; 1219 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1220 if (sf->rankorder) { /* Sort the ranks */ 1221 PetscMPIInt rank; 1222 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1223 PetscSFNode *newremote; 1224 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1225 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1226 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1227 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1228 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1229 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1230 /* Sort the incoming ranks at each vertex, build the inverse map */ 1231 for (i=0; i<sf->nroots; i++) { 1232 PetscInt j; 1233 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1234 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1235 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1236 } 1237 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1238 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1239 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1240 for (i=0; i<sf->nleaves; i++) { 1241 newremote[i].rank = sf->remote[i].rank; 1242 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1243 } 1244 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1245 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1246 } 1247 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1248 } 1249 *multi = sf->multi; 1250 PetscFunctionReturn(0); 1251 } 1252 1253 /*@C 1254 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1255 1256 Collective 1257 1258 Input Parameters: 1259 + sf - original star forest 1260 . nselected - number of selected roots on this process 1261 - selected - indices of the selected roots on this process 1262 1263 Output Parameter: 1264 . esf - new star forest 1265 1266 Level: advanced 1267 1268 Note: 1269 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1270 be done by calling PetscSFGetGraph(). 1271 1272 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1273 @*/ 1274 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1275 { 1276 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1277 const PetscInt *ilocal; 1278 signed char *rootdata,*leafdata,*leafmem; 1279 const PetscSFNode *iremote; 1280 PetscSFNode *new_iremote; 1281 MPI_Comm comm; 1282 PetscErrorCode ierr; 1283 1284 PetscFunctionBegin; 1285 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1286 PetscSFCheckGraphSet(sf,1); 1287 if (nselected) PetscValidPointer(selected,3); 1288 PetscValidPointer(esf,4); 1289 1290 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1291 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1292 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1293 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1294 1295 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1296 PetscBool dups; 1297 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1298 PetscCheckFalse(dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1299 for (i=0; i<nselected; i++) 1300 PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1301 } 1302 1303 if (sf->ops->CreateEmbeddedRootSF) { 1304 ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1305 } else { 1306 /* A generic version of creating embedded sf */ 1307 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1308 maxlocal = maxleaf - minleaf + 1; 1309 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1310 leafdata = leafmem - minleaf; 1311 /* Tag selected roots and bcast to leaves */ 1312 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1313 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1314 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1315 1316 /* Build esf with leaves that are still connected */ 1317 esf_nleaves = 0; 1318 for (i=0; i<nleaves; i++) { 1319 j = ilocal ? ilocal[i] : i; 1320 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1321 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1322 */ 1323 esf_nleaves += (leafdata[j] ? 1 : 0); 1324 } 1325 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1326 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1327 for (i=n=0; i<nleaves; i++) { 1328 j = ilocal ? ilocal[i] : i; 1329 if (leafdata[j]) { 1330 new_ilocal[n] = j; 1331 new_iremote[n].rank = iremote[i].rank; 1332 new_iremote[n].index = iremote[i].index; 1333 ++n; 1334 } 1335 } 1336 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1337 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1338 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1339 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1340 } 1341 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 /*@C 1346 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1347 1348 Collective 1349 1350 Input Parameters: 1351 + sf - original star forest 1352 . nselected - number of selected leaves on this process 1353 - selected - indices of the selected leaves on this process 1354 1355 Output Parameter: 1356 . newsf - new star forest 1357 1358 Level: advanced 1359 1360 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1361 @*/ 1362 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1363 { 1364 const PetscSFNode *iremote; 1365 PetscSFNode *new_iremote; 1366 const PetscInt *ilocal; 1367 PetscInt i,nroots,*leaves,*new_ilocal; 1368 MPI_Comm comm; 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1373 PetscSFCheckGraphSet(sf,1); 1374 if (nselected) PetscValidPointer(selected,3); 1375 PetscValidPointer(newsf,4); 1376 1377 /* Uniq selected[] and put results in leaves[] */ 1378 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1379 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1380 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1381 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1382 PetscCheckFalse(nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1383 1384 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1385 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1386 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1387 } else { 1388 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1389 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1390 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1391 for (i=0; i<nselected; ++i) { 1392 const PetscInt l = leaves[i]; 1393 new_ilocal[i] = ilocal ? ilocal[l] : l; 1394 new_iremote[i].rank = iremote[l].rank; 1395 new_iremote[i].index = iremote[l].index; 1396 } 1397 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1398 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(leaves);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 /*@C 1405 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1406 1407 Collective on PetscSF 1408 1409 Input Parameters: 1410 + sf - star forest on which to communicate 1411 . unit - data type associated with each node 1412 . rootdata - buffer to broadcast 1413 - op - operation to use for reduction 1414 1415 Output Parameter: 1416 . leafdata - buffer to be reduced with values from each leaf's respective root 1417 1418 Level: intermediate 1419 1420 Notes: 1421 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1422 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1423 use PetscSFBcastWithMemTypeBegin() instead. 1424 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1425 @*/ 1426 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1427 { 1428 PetscErrorCode ierr; 1429 PetscMemType rootmtype,leafmtype; 1430 1431 PetscFunctionBegin; 1432 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1433 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1434 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1435 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1436 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1437 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1438 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1439 PetscFunctionReturn(0); 1440 } 1441 1442 /*@C 1443 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1444 1445 Collective on PetscSF 1446 1447 Input Parameters: 1448 + sf - star forest on which to communicate 1449 . unit - data type associated with each node 1450 . rootmtype - memory type of rootdata 1451 . rootdata - buffer to broadcast 1452 . leafmtype - memory type of leafdata 1453 - op - operation to use for reduction 1454 1455 Output Parameter: 1456 . leafdata - buffer to be reduced with values from each leaf's respective root 1457 1458 Level: intermediate 1459 1460 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1461 @*/ 1462 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1463 { 1464 PetscErrorCode ierr; 1465 1466 PetscFunctionBegin; 1467 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1468 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1469 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1470 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1471 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1472 PetscFunctionReturn(0); 1473 } 1474 1475 /*@C 1476 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1477 1478 Collective 1479 1480 Input Parameters: 1481 + sf - star forest 1482 . unit - data type 1483 . rootdata - buffer to broadcast 1484 - op - operation to use for reduction 1485 1486 Output Parameter: 1487 . leafdata - buffer to be reduced with values from each leaf's respective root 1488 1489 Level: intermediate 1490 1491 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1492 @*/ 1493 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1494 { 1495 PetscErrorCode ierr; 1496 1497 PetscFunctionBegin; 1498 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1499 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1500 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1501 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1502 PetscFunctionReturn(0); 1503 } 1504 1505 /*@C 1506 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1507 1508 Collective 1509 1510 Input Parameters: 1511 + sf - star forest 1512 . unit - data type 1513 . leafdata - values to reduce 1514 - op - reduction operation 1515 1516 Output Parameter: 1517 . rootdata - result of reduction of values from all leaves of each root 1518 1519 Level: intermediate 1520 1521 Notes: 1522 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1523 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1524 use PetscSFReduceWithMemTypeBegin() instead. 1525 1526 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1527 @*/ 1528 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1529 { 1530 PetscErrorCode ierr; 1531 PetscMemType rootmtype,leafmtype; 1532 1533 PetscFunctionBegin; 1534 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1535 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1536 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1537 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1538 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1539 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1540 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1541 PetscFunctionReturn(0); 1542 } 1543 1544 /*@C 1545 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1546 1547 Collective 1548 1549 Input Parameters: 1550 + sf - star forest 1551 . unit - data type 1552 . leafmtype - memory type of leafdata 1553 . leafdata - values to reduce 1554 . rootmtype - memory type of rootdata 1555 - op - reduction operation 1556 1557 Output Parameter: 1558 . rootdata - result of reduction of values from all leaves of each root 1559 1560 Level: intermediate 1561 1562 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1563 @*/ 1564 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1565 { 1566 PetscErrorCode ierr; 1567 1568 PetscFunctionBegin; 1569 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1570 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1571 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1572 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1573 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1574 PetscFunctionReturn(0); 1575 } 1576 1577 /*@C 1578 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1579 1580 Collective 1581 1582 Input Parameters: 1583 + sf - star forest 1584 . unit - data type 1585 . leafdata - values to reduce 1586 - op - reduction operation 1587 1588 Output Parameter: 1589 . rootdata - result of reduction of values from all leaves of each root 1590 1591 Level: intermediate 1592 1593 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1594 @*/ 1595 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1596 { 1597 PetscErrorCode ierr; 1598 1599 PetscFunctionBegin; 1600 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1601 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1602 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1603 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1604 PetscFunctionReturn(0); 1605 } 1606 1607 /*@C 1608 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1609 1610 Collective 1611 1612 Input Parameters: 1613 + sf - star forest 1614 . unit - data type 1615 . leafdata - leaf values to use in reduction 1616 - op - operation to use for reduction 1617 1618 Output Parameters: 1619 + rootdata - root values to be updated, input state is seen by first process to perform an update 1620 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1621 1622 Level: advanced 1623 1624 Note: 1625 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1626 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1627 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1628 integers. 1629 1630 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1631 @*/ 1632 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1633 { 1634 PetscErrorCode ierr; 1635 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1636 1637 PetscFunctionBegin; 1638 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1639 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1640 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1641 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1642 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1643 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1644 PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1645 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1646 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 /*@C 1651 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1652 1653 Collective 1654 1655 Input Parameters: 1656 + sf - star forest 1657 . unit - data type 1658 . rootmtype - memory type of rootdata 1659 . leafmtype - memory type of leafdata 1660 . leafdata - leaf values to use in reduction 1661 . leafupdatemtype - memory type of leafupdate 1662 - op - operation to use for reduction 1663 1664 Output Parameters: 1665 + rootdata - root values to be updated, input state is seen by first process to perform an update 1666 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1667 1668 Level: advanced 1669 1670 Note: See PetscSFFetchAndOpBegin() for more details. 1671 1672 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1673 @*/ 1674 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op) 1675 { 1676 PetscErrorCode ierr; 1677 1678 PetscFunctionBegin; 1679 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1680 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1681 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1682 PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1683 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1684 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1685 PetscFunctionReturn(0); 1686 } 1687 1688 /*@C 1689 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1690 1691 Collective 1692 1693 Input Parameters: 1694 + sf - star forest 1695 . unit - data type 1696 . leafdata - leaf values to use in reduction 1697 - op - operation to use for reduction 1698 1699 Output Parameters: 1700 + rootdata - root values to be updated, input state is seen by first process to perform an update 1701 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1702 1703 Level: advanced 1704 1705 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1706 @*/ 1707 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1708 { 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1713 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1714 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1715 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 /*@C 1720 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1721 1722 Collective 1723 1724 Input Parameter: 1725 . sf - star forest 1726 1727 Output Parameter: 1728 . degree - degree of each root vertex 1729 1730 Level: advanced 1731 1732 Notes: 1733 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1734 1735 .seealso: PetscSFGatherBegin() 1736 @*/ 1737 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1738 { 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1743 PetscSFCheckGraphSet(sf,1); 1744 PetscValidPointer(degree,2); 1745 if (!sf->degreeknown) { 1746 PetscInt i, nroots = sf->nroots, maxlocal; 1747 PetscCheckFalse(sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1748 maxlocal = sf->maxleaf-sf->minleaf+1; 1749 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1750 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1751 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1752 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1753 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1754 } 1755 *degree = NULL; 1756 PetscFunctionReturn(0); 1757 } 1758 1759 /*@C 1760 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1761 1762 Collective 1763 1764 Input Parameter: 1765 . sf - star forest 1766 1767 Output Parameter: 1768 . degree - degree of each root vertex 1769 1770 Level: developer 1771 1772 Notes: 1773 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1774 1775 .seealso: 1776 @*/ 1777 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1778 { 1779 PetscErrorCode ierr; 1780 1781 PetscFunctionBegin; 1782 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1783 PetscSFCheckGraphSet(sf,1); 1784 PetscValidPointer(degree,2); 1785 if (!sf->degreeknown) { 1786 PetscCheckFalse(!sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1787 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1788 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1789 sf->degreeknown = PETSC_TRUE; 1790 } 1791 *degree = sf->degree; 1792 PetscFunctionReturn(0); 1793 } 1794 1795 /*@C 1796 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1797 Each multi-root is assigned index of the corresponding original root. 1798 1799 Collective 1800 1801 Input Parameters: 1802 + sf - star forest 1803 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1804 1805 Output Parameters: 1806 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1807 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1808 1809 Level: developer 1810 1811 Notes: 1812 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1813 1814 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1815 @*/ 1816 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1817 { 1818 PetscSF msf; 1819 PetscInt i, j, k, nroots, nmroots; 1820 PetscErrorCode ierr; 1821 1822 PetscFunctionBegin; 1823 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1824 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1825 if (nroots) PetscValidIntPointer(degree,2); 1826 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1827 PetscValidPointer(multiRootsOrigNumbering,4); 1828 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1829 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1830 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1831 for (i=0,j=0,k=0; i<nroots; i++) { 1832 if (!degree[i]) continue; 1833 for (j=0; j<degree[i]; j++,k++) { 1834 (*multiRootsOrigNumbering)[k] = i; 1835 } 1836 } 1837 PetscCheckFalse(k != nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1838 if (nMultiRoots) *nMultiRoots = nmroots; 1839 PetscFunctionReturn(0); 1840 } 1841 1842 /*@C 1843 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1844 1845 Collective 1846 1847 Input Parameters: 1848 + sf - star forest 1849 . unit - data type 1850 - leafdata - leaf data to gather to roots 1851 1852 Output Parameter: 1853 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1854 1855 Level: intermediate 1856 1857 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1858 @*/ 1859 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1860 { 1861 PetscErrorCode ierr; 1862 PetscSF multi = NULL; 1863 1864 PetscFunctionBegin; 1865 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1866 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1867 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1868 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1869 PetscFunctionReturn(0); 1870 } 1871 1872 /*@C 1873 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1874 1875 Collective 1876 1877 Input Parameters: 1878 + sf - star forest 1879 . unit - data type 1880 - leafdata - leaf data to gather to roots 1881 1882 Output Parameter: 1883 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1884 1885 Level: intermediate 1886 1887 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1888 @*/ 1889 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1890 { 1891 PetscErrorCode ierr; 1892 PetscSF multi = NULL; 1893 1894 PetscFunctionBegin; 1895 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1896 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1897 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1898 PetscFunctionReturn(0); 1899 } 1900 1901 /*@C 1902 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1903 1904 Collective 1905 1906 Input Parameters: 1907 + sf - star forest 1908 . unit - data type 1909 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1910 1911 Output Parameter: 1912 . leafdata - leaf data to be update with personal data from each respective root 1913 1914 Level: intermediate 1915 1916 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1917 @*/ 1918 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1919 { 1920 PetscErrorCode ierr; 1921 PetscSF multi = NULL; 1922 1923 PetscFunctionBegin; 1924 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1925 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1926 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1927 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 /*@C 1932 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1933 1934 Collective 1935 1936 Input Parameters: 1937 + sf - star forest 1938 . unit - data type 1939 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1940 1941 Output Parameter: 1942 . leafdata - leaf data to be update with personal data from each respective root 1943 1944 Level: intermediate 1945 1946 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1947 @*/ 1948 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1949 { 1950 PetscErrorCode ierr; 1951 PetscSF multi = NULL; 1952 1953 PetscFunctionBegin; 1954 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1955 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1956 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1961 { 1962 PetscInt i, n, nleaves; 1963 const PetscInt *ilocal = NULL; 1964 PetscHSetI seen; 1965 PetscErrorCode ierr; 1966 1967 PetscFunctionBegin; 1968 if (PetscDefined(USE_DEBUG)) { 1969 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1970 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1971 for (i = 0; i < nleaves; i++) { 1972 const PetscInt leaf = ilocal ? ilocal[i] : i; 1973 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1974 } 1975 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1976 PetscCheckFalse(n != nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1977 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 /*@ 1983 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1984 1985 Input Parameters: 1986 + sfA - The first PetscSF 1987 - sfB - The second PetscSF 1988 1989 Output Parameters: 1990 . sfBA - The composite SF 1991 1992 Level: developer 1993 1994 Notes: 1995 Currently, the two SFs must be defined on congruent communicators and they must be true star 1996 forests, i.e. the same leaf is not connected with different roots. 1997 1998 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1999 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 2000 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 2001 Bcast on sfA, then a Bcast on sfB, on connected nodes. 2002 2003 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 2004 @*/ 2005 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2006 { 2007 PetscErrorCode ierr; 2008 const PetscSFNode *remotePointsA,*remotePointsB; 2009 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 2010 const PetscInt *localPointsA,*localPointsB; 2011 PetscInt *localPointsBA; 2012 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 2013 PetscBool denseB; 2014 2015 PetscFunctionBegin; 2016 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2017 PetscSFCheckGraphSet(sfA,1); 2018 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2019 PetscSFCheckGraphSet(sfB,2); 2020 PetscCheckSameComm(sfA,1,sfB,2); 2021 PetscValidPointer(sfBA,3); 2022 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2023 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2024 2025 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2026 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2027 if (localPointsA) { 2028 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2029 for (i=0; i<numRootsB; i++) { 2030 reorderedRemotePointsA[i].rank = -1; 2031 reorderedRemotePointsA[i].index = -1; 2032 } 2033 for (i=0; i<numLeavesA; i++) { 2034 if (localPointsA[i] >= numRootsB) continue; 2035 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2036 } 2037 remotePointsA = reorderedRemotePointsA; 2038 } 2039 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2040 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2041 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2042 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2043 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2044 2045 denseB = (PetscBool)!localPointsB; 2046 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2047 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2048 else numLeavesBA++; 2049 } 2050 if (denseB) { 2051 localPointsBA = NULL; 2052 remotePointsBA = leafdataB; 2053 } else { 2054 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2055 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2056 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2057 const PetscInt l = localPointsB ? localPointsB[i] : i; 2058 2059 if (leafdataB[l-minleaf].rank == -1) continue; 2060 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2061 localPointsBA[numLeavesBA] = l; 2062 numLeavesBA++; 2063 } 2064 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2065 } 2066 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2067 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2068 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 /*@ 2073 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2074 2075 Input Parameters: 2076 + sfA - The first PetscSF 2077 - sfB - The second PetscSF 2078 2079 Output Parameters: 2080 . sfBA - The composite SF. 2081 2082 Level: developer 2083 2084 Notes: 2085 Currently, the two SFs must be defined on congruent communicators and they must be true star 2086 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2087 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2088 2089 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2090 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2091 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2092 a Reduce on sfB, on connected roots. 2093 2094 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2095 @*/ 2096 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2097 { 2098 PetscErrorCode ierr; 2099 const PetscSFNode *remotePointsA,*remotePointsB; 2100 PetscSFNode *remotePointsBA; 2101 const PetscInt *localPointsA,*localPointsB; 2102 PetscSFNode *reorderedRemotePointsA = NULL; 2103 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2104 MPI_Op op; 2105 #if defined(PETSC_USE_64BIT_INDICES) 2106 PetscBool iswin; 2107 #endif 2108 2109 PetscFunctionBegin; 2110 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2111 PetscSFCheckGraphSet(sfA,1); 2112 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2113 PetscSFCheckGraphSet(sfB,2); 2114 PetscCheckSameComm(sfA,1,sfB,2); 2115 PetscValidPointer(sfBA,3); 2116 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2117 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2118 2119 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2120 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2121 2122 /* TODO: Check roots of sfB have degree of 1 */ 2123 /* Once we implement it, we can replace the MPI_MAXLOC 2124 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2125 We use MPI_MAXLOC only to have a deterministic output from this routine if 2126 the root condition is not meet. 2127 */ 2128 op = MPI_MAXLOC; 2129 #if defined(PETSC_USE_64BIT_INDICES) 2130 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2131 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2132 if (iswin) op = MPI_REPLACE; 2133 #endif 2134 2135 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2136 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2137 for (i=0; i<maxleaf - minleaf + 1; i++) { 2138 reorderedRemotePointsA[i].rank = -1; 2139 reorderedRemotePointsA[i].index = -1; 2140 } 2141 if (localPointsA) { 2142 for (i=0; i<numLeavesA; i++) { 2143 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2144 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2145 } 2146 } else { 2147 for (i=0; i<numLeavesA; i++) { 2148 if (i > maxleaf || i < minleaf) continue; 2149 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2150 } 2151 } 2152 2153 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2154 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2155 for (i=0; i<numRootsB; i++) { 2156 remotePointsBA[i].rank = -1; 2157 remotePointsBA[i].index = -1; 2158 } 2159 2160 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2161 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2162 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2163 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2164 if (remotePointsBA[i].rank == -1) continue; 2165 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2166 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2167 localPointsBA[numLeavesBA] = i; 2168 numLeavesBA++; 2169 } 2170 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2171 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2172 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 /* 2177 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2178 2179 Input Parameters: 2180 . sf - The global PetscSF 2181 2182 Output Parameters: 2183 . out - The local PetscSF 2184 */ 2185 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2186 { 2187 MPI_Comm comm; 2188 PetscMPIInt myrank; 2189 const PetscInt *ilocal; 2190 const PetscSFNode *iremote; 2191 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2192 PetscSFNode *liremote; 2193 PetscSF lsf; 2194 PetscErrorCode ierr; 2195 2196 PetscFunctionBegin; 2197 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2198 if (sf->ops->CreateLocalSF) { 2199 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2200 } else { 2201 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2202 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2203 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2204 2205 /* Find out local edges and build a local SF */ 2206 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2207 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2208 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2209 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2210 2211 for (i=j=0; i<nleaves; i++) { 2212 if (iremote[i].rank == (PetscInt)myrank) { 2213 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2214 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2215 liremote[j].index = iremote[i].index; 2216 j++; 2217 } 2218 } 2219 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2220 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2221 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2222 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2223 *out = lsf; 2224 } 2225 PetscFunctionReturn(0); 2226 } 2227 2228 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2229 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2230 { 2231 PetscErrorCode ierr; 2232 PetscMemType rootmtype,leafmtype; 2233 2234 PetscFunctionBegin; 2235 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2236 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2237 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2238 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2239 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2240 if (sf->ops->BcastToZero) { 2241 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2242 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2243 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2244 PetscFunctionReturn(0); 2245 } 2246 2247