1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_CLANG_STATIC_ANALYZER) 15 void PetscSFCheckGraphSet(PetscSF,int); 16 #else 17 #if defined(PETSC_USE_DEBUG) 18 # define PetscSFCheckGraphSet(sf,arg) do { \ 19 if (PetscUnlikely(!(sf)->graphset)) \ 20 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 21 } while (0) 22 #else 23 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 24 #endif 25 #endif 26 27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 28 29 /*@ 30 PetscSFCreate - create a star forest communication context 31 32 Collective 33 34 Input Parameter: 35 . comm - communicator on which the star forest will operate 36 37 Output Parameter: 38 . sf - new star forest context 39 40 Options Database Keys: 41 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 42 . -sf_type window -Use MPI-3 one-sided window for communication 43 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 44 45 Level: intermediate 46 47 Notes: 48 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 49 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 50 SFs are optimized and they have better performance than general SFs. 51 52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 53 @*/ 54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 55 { 56 PetscErrorCode ierr; 57 PetscSF b; 58 59 PetscFunctionBegin; 60 PetscValidPointer(sf,2); 61 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 62 63 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 64 65 b->nroots = -1; 66 b->nleaves = -1; 67 b->minleaf = PETSC_MAX_INT; 68 b->maxleaf = PETSC_MIN_INT; 69 b->nranks = -1; 70 b->rankorder = PETSC_TRUE; 71 b->ingroup = MPI_GROUP_NULL; 72 b->outgroup = MPI_GROUP_NULL; 73 b->graphset = PETSC_FALSE; 74 #if defined(PETSC_HAVE_DEVICE) 75 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 76 b->use_stream_aware_mpi = PETSC_FALSE; 77 b->unknown_input_stream= PETSC_FALSE; 78 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 79 b->backend = PETSCSF_BACKEND_KOKKOS; 80 #elif defined(PETSC_HAVE_CUDA) 81 b->backend = PETSCSF_BACKEND_CUDA; 82 #elif defined(PETSC_HAVE_HIP) 83 b->backend = PETSCSF_BACKEND_HIP; 84 #endif 85 86 #if defined(PETSC_HAVE_NVSHMEM) 87 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 88 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 89 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 90 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 91 #endif 92 #endif 93 b->vscat.from_n = -1; 94 b->vscat.to_n = -1; 95 b->vscat.unit = MPIU_SCALAR; 96 *sf = b; 97 PetscFunctionReturn(0); 98 } 99 100 /*@ 101 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 102 103 Collective 104 105 Input Parameter: 106 . sf - star forest 107 108 Level: advanced 109 110 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 111 @*/ 112 PetscErrorCode PetscSFReset(PetscSF sf) 113 { 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 118 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 119 sf->nroots = -1; 120 sf->nleaves = -1; 121 sf->minleaf = PETSC_MAX_INT; 122 sf->maxleaf = PETSC_MIN_INT; 123 sf->mine = NULL; 124 sf->remote = NULL; 125 sf->graphset = PETSC_FALSE; 126 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 127 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 128 sf->nranks = -1; 129 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 130 sf->degreeknown = PETSC_FALSE; 131 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 132 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 133 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 134 if (sf->multi) sf->multi->multi = NULL; 135 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 136 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 137 138 #if defined(PETSC_HAVE_DEVICE) 139 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 140 #endif 141 142 sf->setupcalled = PETSC_FALSE; 143 PetscFunctionReturn(0); 144 } 145 146 /*@C 147 PetscSFSetType - Set the PetscSF communication implementation 148 149 Collective on PetscSF 150 151 Input Parameters: 152 + sf - the PetscSF context 153 - type - a known method 154 155 Options Database Key: 156 . -sf_type <type> - Sets the method; use -help for a list 157 of available methods (for instance, window, basic, neighbor) 158 159 Notes: 160 See "include/petscsf.h" for available methods (for instance) 161 + PETSCSFWINDOW - MPI-2/3 one-sided 162 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 163 164 Level: intermediate 165 166 .seealso: PetscSFType, PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 169 { 170 PetscErrorCode ierr,(*r)(PetscSF); 171 PetscBool match; 172 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 175 PetscValidCharPointer(type,2); 176 177 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 178 if (match) PetscFunctionReturn(0); 179 180 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 181 PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 182 /* Destroy the previous PetscSF implementation context */ 183 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 184 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 185 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 186 ierr = (*r)(sf);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 /*@C 191 PetscSFGetType - Get the PetscSF communication implementation 192 193 Not Collective 194 195 Input Parameter: 196 . sf - the PetscSF context 197 198 Output Parameter: 199 . type - the PetscSF type name 200 201 Level: intermediate 202 203 .seealso: PetscSFSetType(), PetscSFCreate() 204 @*/ 205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 206 { 207 PetscFunctionBegin; 208 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 209 PetscValidPointer(type,2); 210 *type = ((PetscObject)sf)->type_name; 211 PetscFunctionReturn(0); 212 } 213 214 /*@C 215 PetscSFDestroy - destroy star forest 216 217 Collective 218 219 Input Parameter: 220 . sf - address of star forest 221 222 Level: intermediate 223 224 .seealso: PetscSFCreate(), PetscSFReset() 225 @*/ 226 PetscErrorCode PetscSFDestroy(PetscSF *sf) 227 { 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 if (!*sf) PetscFunctionReturn(0); 232 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 233 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 234 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 235 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 236 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 237 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 238 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 243 { 244 PetscInt i, nleaves; 245 PetscMPIInt size; 246 const PetscInt *ilocal; 247 const PetscSFNode *iremote; 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 252 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 254 for (i = 0; i < nleaves; i++) { 255 const PetscInt rank = iremote[i].rank; 256 const PetscInt remote = iremote[i].index; 257 const PetscInt leaf = ilocal ? ilocal[i] : i; 258 PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size); 259 PetscCheckFalse(remote < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i); 260 PetscCheckFalse(leaf < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i); 261 } 262 PetscFunctionReturn(0); 263 } 264 265 /*@ 266 PetscSFSetUp - set up communication structures 267 268 Collective 269 270 Input Parameter: 271 . sf - star forest communication object 272 273 Level: beginner 274 275 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 276 @*/ 277 PetscErrorCode PetscSFSetUp(PetscSF sf) 278 { 279 PetscErrorCode ierr; 280 281 PetscFunctionBegin; 282 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 283 PetscSFCheckGraphSet(sf,1); 284 if (sf->setupcalled) PetscFunctionReturn(0); 285 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 286 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 287 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 288 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 289 #if defined(PETSC_HAVE_CUDA) 290 if (sf->backend == PETSCSF_BACKEND_CUDA) { 291 sf->ops->Malloc = PetscSFMalloc_CUDA; 292 sf->ops->Free = PetscSFFree_CUDA; 293 } 294 #endif 295 #if defined(PETSC_HAVE_HIP) 296 if (sf->backend == PETSCSF_BACKEND_HIP) { 297 sf->ops->Malloc = PetscSFMalloc_HIP; 298 sf->ops->Free = PetscSFFree_HIP; 299 } 300 #endif 301 302 # 303 #if defined(PETSC_HAVE_KOKKOS) 304 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 305 sf->ops->Malloc = PetscSFMalloc_Kokkos; 306 sf->ops->Free = PetscSFFree_Kokkos; 307 } 308 #endif 309 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 310 sf->setupcalled = PETSC_TRUE; 311 PetscFunctionReturn(0); 312 } 313 314 /*@ 315 PetscSFSetFromOptions - set PetscSF options using the options database 316 317 Logically Collective 318 319 Input Parameter: 320 . sf - star forest 321 322 Options Database Keys: 323 + -sf_type - implementation type, see PetscSFSetType() 324 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 325 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 326 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 327 If true, this option only works with -use_gpu_aware_mpi 1. 328 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 329 If true, this option only works with -use_gpu_aware_mpi 1. 330 331 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 332 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 333 the only available is kokkos. 334 335 Level: intermediate 336 @*/ 337 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 338 { 339 PetscSFType deft; 340 char type[256]; 341 PetscErrorCode ierr; 342 PetscBool flg; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 346 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 347 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 348 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 349 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 350 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 351 #if defined(PETSC_HAVE_DEVICE) 352 { 353 char backendstr[32] = {0}; 354 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 355 /* Change the defaults set in PetscSFCreate() with command line options */ 356 ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 357 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 358 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 359 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 360 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 361 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 362 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 363 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 364 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 365 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 366 else PetscCheckFalse(set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 367 #elif defined(PETSC_HAVE_KOKKOS) 368 PetscCheckFalse(set && !isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 369 #endif 370 } 371 #endif 372 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 373 ierr = PetscOptionsEnd();CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 /*@ 378 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 379 380 Logically Collective 381 382 Input Parameters: 383 + sf - star forest 384 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 385 386 Level: advanced 387 388 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 389 @*/ 390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 391 { 392 PetscFunctionBegin; 393 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 394 PetscValidLogicalCollectiveBool(sf,flg,2); 395 PetscCheckFalse(sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 396 sf->rankorder = flg; 397 PetscFunctionReturn(0); 398 } 399 400 /*@C 401 PetscSFSetGraph - Set a parallel star forest 402 403 Collective 404 405 Input Parameters: 406 + sf - star forest 407 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 408 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 409 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 410 during setup in debug mode) 411 . localmode - copy mode for ilocal 412 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 413 during setup in debug mode) 414 - remotemode - copy mode for iremote 415 416 Level: intermediate 417 418 Notes: 419 Leaf indices in ilocal must be unique, otherwise an error occurs. 420 421 Input arrays ilocal and iremote follow the PetscCopyMode semantics. 422 In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER, 423 PETSc might modify the respective array; 424 if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy(). 425 Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed). 426 427 Fortran Notes: 428 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode. 429 430 Developer Notes: 431 We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf. 432 This also allows to compare leaf sets of two SFs easily. 433 434 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 435 @*/ 436 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode) 437 { 438 PetscBool unique, contiguous; 439 PetscErrorCode ierr; 440 441 PetscFunctionBegin; 442 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 443 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 444 if (nleaves > 0) PetscValidPointer(iremote,6); 445 PetscCheckFalse(nroots < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots); 446 PetscCheckFalse(nleaves < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves); 447 PetscCheck(localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode); 448 PetscCheck(remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode); 449 450 if (sf->nroots >= 0) { /* Reset only if graph already set */ 451 ierr = PetscSFReset(sf);CHKERRQ(ierr); 452 } 453 454 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 455 456 sf->nroots = nroots; 457 sf->nleaves = nleaves; 458 459 if (localmode == PETSC_COPY_VALUES && ilocal) { 460 PetscInt *tlocal = NULL; 461 462 ierr = PetscMalloc1(nleaves,&tlocal);CHKERRQ(ierr); 463 ierr = PetscArraycpy(tlocal,ilocal,nleaves);CHKERRQ(ierr); 464 ilocal = tlocal; 465 } 466 if (remotemode == PETSC_COPY_VALUES) { 467 PetscSFNode *tremote = NULL; 468 469 ierr = PetscMalloc1(nleaves,&tremote);CHKERRQ(ierr); 470 ierr = PetscArraycpy(tremote,iremote,nleaves);CHKERRQ(ierr); 471 iremote = tremote; 472 } 473 474 if (nleaves && ilocal) { 475 PetscSFNode work; 476 477 ierr = PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);CHKERRQ(ierr); 478 ierr = PetscSortedCheckDupsInt(nleaves, ilocal, &unique);CHKERRQ(ierr); 479 unique = PetscNot(unique); 480 PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF"); 481 sf->minleaf = ilocal[0]; 482 sf->maxleaf = ilocal[nleaves-1]; 483 contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1); 484 } else { 485 sf->minleaf = 0; 486 sf->maxleaf = nleaves - 1; 487 unique = PETSC_TRUE; 488 contiguous = PETSC_TRUE; 489 } 490 491 if (contiguous) { 492 if (localmode == PETSC_USE_POINTER) { 493 ilocal = NULL; 494 } else { 495 ierr = PetscFree(ilocal);CHKERRQ(ierr); 496 } 497 } 498 sf->mine = ilocal; 499 if (localmode == PETSC_USE_POINTER) { 500 sf->mine_alloc = NULL; 501 } else { 502 sf->mine_alloc = ilocal; 503 } 504 sf->remote = iremote; 505 if (remotemode == PETSC_USE_POINTER) { 506 sf->remote_alloc = NULL; 507 } else { 508 sf->remote_alloc = iremote; 509 } 510 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 511 sf->graphset = PETSC_TRUE; 512 PetscFunctionReturn(0); 513 } 514 515 /*@ 516 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 517 518 Collective 519 520 Input Parameters: 521 + sf - The PetscSF 522 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 523 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 524 525 Notes: 526 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 527 n and N are local and global sizes of x respectively. 528 529 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 530 sequential vectors y on all ranks. 531 532 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 533 sequential vector y on rank 0. 534 535 In above cases, entries of x are roots and entries of y are leaves. 536 537 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 538 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 539 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 540 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 541 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 542 543 In this case, roots and leaves are symmetric. 544 545 Level: intermediate 546 @*/ 547 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 548 { 549 MPI_Comm comm; 550 PetscInt n,N,res[2]; 551 PetscMPIInt rank,size; 552 PetscSFType type; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 557 if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2); 558 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 559 PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern); 560 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 561 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 562 563 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 564 type = PETSCSFALLTOALL; 565 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 566 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 567 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 568 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 569 } else { 570 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 571 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 572 res[0] = n; 573 res[1] = -n; 574 /* Check if n are same over all ranks so that we can optimize it */ 575 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 576 if (res[0] == -res[1]) { /* same n */ 577 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 578 } else { 579 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 580 } 581 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 582 } 583 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 584 585 sf->pattern = pattern; 586 sf->mine = NULL; /* Contiguous */ 587 588 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 589 Also set other easy stuff. 590 */ 591 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 592 sf->nleaves = N; 593 sf->nroots = n; 594 sf->nranks = size; 595 sf->minleaf = 0; 596 sf->maxleaf = N - 1; 597 } else if (pattern == PETSCSF_PATTERN_GATHER) { 598 sf->nleaves = rank ? 0 : N; 599 sf->nroots = n; 600 sf->nranks = rank ? 0 : size; 601 sf->minleaf = 0; 602 sf->maxleaf = rank ? -1 : N - 1; 603 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 604 sf->nleaves = size; 605 sf->nroots = size; 606 sf->nranks = size; 607 sf->minleaf = 0; 608 sf->maxleaf = size - 1; 609 } 610 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 611 sf->graphset = PETSC_TRUE; 612 PetscFunctionReturn(0); 613 } 614 615 /*@ 616 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 617 618 Collective 619 620 Input Parameter: 621 . sf - star forest to invert 622 623 Output Parameter: 624 . isf - inverse of sf 625 626 Level: advanced 627 628 Notes: 629 All roots must have degree 1. 630 631 The local space may be a permutation, but cannot be sparse. 632 633 .seealso: PetscSFSetGraph() 634 @*/ 635 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 636 { 637 PetscErrorCode ierr; 638 PetscMPIInt rank; 639 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 640 const PetscInt *ilocal; 641 PetscSFNode *roots,*leaves; 642 643 PetscFunctionBegin; 644 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 645 PetscSFCheckGraphSet(sf,1); 646 PetscValidPointer(isf,2); 647 648 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 649 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 650 651 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 652 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 653 for (i=0; i<maxlocal; i++) { 654 leaves[i].rank = rank; 655 leaves[i].index = i; 656 } 657 for (i=0; i <nroots; i++) { 658 roots[i].rank = -1; 659 roots[i].index = -1; 660 } 661 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 662 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 663 664 /* Check whether our leaves are sparse */ 665 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 666 if (count == nroots) newilocal = NULL; 667 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 668 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 669 for (i=0,count=0; i<nroots; i++) { 670 if (roots[i].rank >= 0) { 671 newilocal[count] = i; 672 roots[count].rank = roots[i].rank; 673 roots[count].index = roots[i].index; 674 count++; 675 } 676 } 677 } 678 679 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 680 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 681 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 /*@ 686 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 687 688 Collective 689 690 Input Parameters: 691 + sf - communication object to duplicate 692 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 693 694 Output Parameter: 695 . newsf - new communication object 696 697 Level: beginner 698 699 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 700 @*/ 701 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 702 { 703 PetscSFType type; 704 PetscErrorCode ierr; 705 MPI_Datatype dtype=MPIU_SCALAR; 706 707 PetscFunctionBegin; 708 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 709 PetscValidLogicalCollectiveEnum(sf,opt,2); 710 PetscValidPointer(newsf,3); 711 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 712 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 713 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 714 if (opt == PETSCSF_DUPLICATE_GRAPH) { 715 PetscSFCheckGraphSet(sf,1); 716 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 717 PetscInt nroots,nleaves; 718 const PetscInt *ilocal; 719 const PetscSFNode *iremote; 720 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 721 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 722 } else { 723 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 724 } 725 } 726 /* Since oldtype is committed, so is newtype, according to MPI */ 727 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 728 (*newsf)->vscat.bs = sf->vscat.bs; 729 (*newsf)->vscat.unit = dtype; 730 (*newsf)->vscat.to_n = sf->vscat.to_n; 731 (*newsf)->vscat.from_n = sf->vscat.from_n; 732 /* Do not copy lsf. Build it on demand since it is rarely used */ 733 734 #if defined(PETSC_HAVE_DEVICE) 735 (*newsf)->backend = sf->backend; 736 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 737 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 738 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 739 #endif 740 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 741 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 742 PetscFunctionReturn(0); 743 } 744 745 /*@C 746 PetscSFGetGraph - Get the graph specifying a parallel star forest 747 748 Not Collective 749 750 Input Parameter: 751 . sf - star forest 752 753 Output Parameters: 754 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 755 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 756 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 757 - iremote - remote locations of root vertices for each leaf on the current process 758 759 Notes: 760 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 761 762 The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(), 763 see its manpage for details. 764 765 Fortran Notes: 766 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 767 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 768 769 To check for a NULL ilocal use 770 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 771 772 Level: intermediate 773 774 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 775 @*/ 776 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 777 { 778 PetscErrorCode ierr; 779 780 PetscFunctionBegin; 781 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 782 if (sf->ops->GetGraph) { 783 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 784 } else { 785 if (nroots) *nroots = sf->nroots; 786 if (nleaves) *nleaves = sf->nleaves; 787 if (ilocal) *ilocal = sf->mine; 788 if (iremote) *iremote = sf->remote; 789 } 790 PetscFunctionReturn(0); 791 } 792 793 /*@ 794 PetscSFGetLeafRange - Get the active leaf ranges 795 796 Not Collective 797 798 Input Parameter: 799 . sf - star forest 800 801 Output Parameters: 802 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 803 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 804 805 Level: developer 806 807 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 808 @*/ 809 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 810 { 811 PetscFunctionBegin; 812 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 813 PetscSFCheckGraphSet(sf,1); 814 if (minleaf) *minleaf = sf->minleaf; 815 if (maxleaf) *maxleaf = sf->maxleaf; 816 PetscFunctionReturn(0); 817 } 818 819 /*@C 820 PetscSFViewFromOptions - View from Options 821 822 Collective on PetscSF 823 824 Input Parameters: 825 + A - the star forest 826 . obj - Optional object 827 - name - command line option 828 829 Level: intermediate 830 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 831 @*/ 832 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 833 { 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 838 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 /*@C 843 PetscSFView - view a star forest 844 845 Collective 846 847 Input Parameters: 848 + sf - star forest 849 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 850 851 Level: beginner 852 853 .seealso: PetscSFCreate(), PetscSFSetGraph() 854 @*/ 855 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 856 { 857 PetscErrorCode ierr; 858 PetscBool iascii; 859 PetscViewerFormat format; 860 861 PetscFunctionBegin; 862 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 863 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 864 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 865 PetscCheckSameComm(sf,1,viewer,2); 866 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 867 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 868 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 869 PetscMPIInt rank; 870 PetscInt ii,i,j; 871 872 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 873 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 874 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 875 if (!sf->graphset) { 876 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 877 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 878 PetscFunctionReturn(0); 879 } 880 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 881 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 882 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 883 for (i=0; i<sf->nleaves; i++) { 884 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 885 } 886 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 887 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 888 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 889 PetscMPIInt *tmpranks,*perm; 890 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 891 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 892 for (i=0; i<sf->nranks; i++) perm[i] = i; 893 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 895 for (ii=0; ii<sf->nranks; ii++) { 896 i = perm[ii]; 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 898 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 900 } 901 } 902 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 903 } 904 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 905 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 906 } 907 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 908 } 909 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 910 PetscFunctionReturn(0); 911 } 912 913 /*@C 914 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 915 916 Not Collective 917 918 Input Parameter: 919 . sf - star forest 920 921 Output Parameters: 922 + nranks - number of ranks referenced by local part 923 . ranks - array of ranks 924 . roffset - offset in rmine/rremote for each rank (length nranks+1) 925 . rmine - concatenated array holding local indices referencing each remote rank 926 - rremote - concatenated array holding remote indices referenced for each remote rank 927 928 Level: developer 929 930 .seealso: PetscSFGetLeafRanks() 931 @*/ 932 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 933 { 934 PetscErrorCode ierr; 935 936 PetscFunctionBegin; 937 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 938 PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 939 if (sf->ops->GetRootRanks) { 940 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 941 } else { 942 /* The generic implementation */ 943 if (nranks) *nranks = sf->nranks; 944 if (ranks) *ranks = sf->ranks; 945 if (roffset) *roffset = sf->roffset; 946 if (rmine) *rmine = sf->rmine; 947 if (rremote) *rremote = sf->rremote; 948 } 949 PetscFunctionReturn(0); 950 } 951 952 /*@C 953 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 954 955 Not Collective 956 957 Input Parameter: 958 . sf - star forest 959 960 Output Parameters: 961 + niranks - number of leaf ranks referencing roots on this process 962 . iranks - array of ranks 963 . ioffset - offset in irootloc for each rank (length niranks+1) 964 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 965 966 Level: developer 967 968 .seealso: PetscSFGetRootRanks() 969 @*/ 970 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 971 { 972 PetscErrorCode ierr; 973 974 PetscFunctionBegin; 975 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 976 PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 977 if (sf->ops->GetLeafRanks) { 978 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 979 } else { 980 PetscSFType type; 981 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 982 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 983 } 984 PetscFunctionReturn(0); 985 } 986 987 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 988 PetscInt i; 989 for (i=0; i<n; i++) { 990 if (needle == list[i]) return PETSC_TRUE; 991 } 992 return PETSC_FALSE; 993 } 994 995 /*@C 996 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 997 998 Collective 999 1000 Input Parameters: 1001 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1002 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1003 1004 Level: developer 1005 1006 .seealso: PetscSFGetRootRanks() 1007 @*/ 1008 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1009 { 1010 PetscErrorCode ierr; 1011 PetscTable table; 1012 PetscTablePosition pos; 1013 PetscMPIInt size,groupsize,*groupranks; 1014 PetscInt *rcount,*ranks; 1015 PetscInt i, irank = -1,orank = -1; 1016 1017 PetscFunctionBegin; 1018 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1019 PetscSFCheckGraphSet(sf,1); 1020 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1021 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1022 for (i=0; i<sf->nleaves; i++) { 1023 /* Log 1-based rank */ 1024 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1025 } 1026 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1027 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1028 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1029 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1030 for (i=0; i<sf->nranks; i++) { 1031 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1032 ranks[i]--; /* Convert back to 0-based */ 1033 } 1034 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1035 1036 /* We expect that dgroup is reliably "small" while nranks could be large */ 1037 { 1038 MPI_Group group = MPI_GROUP_NULL; 1039 PetscMPIInt *dgroupranks; 1040 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1041 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1042 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1043 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1044 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1045 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1046 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1047 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1048 } 1049 1050 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1051 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1052 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1053 if (InList(ranks[i],groupsize,groupranks)) break; 1054 } 1055 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1056 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1057 } 1058 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1059 PetscInt tmprank,tmpcount; 1060 1061 tmprank = ranks[i]; 1062 tmpcount = rcount[i]; 1063 ranks[i] = ranks[sf->ndranks]; 1064 rcount[i] = rcount[sf->ndranks]; 1065 ranks[sf->ndranks] = tmprank; 1066 rcount[sf->ndranks] = tmpcount; 1067 sf->ndranks++; 1068 } 1069 } 1070 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1071 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1072 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1073 sf->roffset[0] = 0; 1074 for (i=0; i<sf->nranks; i++) { 1075 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1076 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1077 rcount[i] = 0; 1078 } 1079 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1080 /* short circuit */ 1081 if (orank != sf->remote[i].rank) { 1082 /* Search for index of iremote[i].rank in sf->ranks */ 1083 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1084 if (irank < 0) { 1085 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1086 if (irank >= 0) irank += sf->ndranks; 1087 } 1088 orank = sf->remote[i].rank; 1089 } 1090 PetscCheckFalse(irank < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank); 1091 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1092 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1093 rcount[irank]++; 1094 } 1095 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /*@C 1100 PetscSFGetGroups - gets incoming and outgoing process groups 1101 1102 Collective 1103 1104 Input Parameter: 1105 . sf - star forest 1106 1107 Output Parameters: 1108 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1109 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1110 1111 Level: developer 1112 1113 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1114 @*/ 1115 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1116 { 1117 PetscErrorCode ierr; 1118 MPI_Group group = MPI_GROUP_NULL; 1119 1120 PetscFunctionBegin; 1121 PetscCheckFalse(sf->nranks < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1122 if (sf->ingroup == MPI_GROUP_NULL) { 1123 PetscInt i; 1124 const PetscInt *indegree; 1125 PetscMPIInt rank,*outranks,*inranks; 1126 PetscSFNode *remote; 1127 PetscSF bgcount; 1128 1129 /* Compute the number of incoming ranks */ 1130 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1131 for (i=0; i<sf->nranks; i++) { 1132 remote[i].rank = sf->ranks[i]; 1133 remote[i].index = 0; 1134 } 1135 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1136 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1137 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1138 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1139 /* Enumerate the incoming ranks */ 1140 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1141 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1142 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1143 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1144 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1145 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1146 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1147 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1148 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1149 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1150 } 1151 *incoming = sf->ingroup; 1152 1153 if (sf->outgroup == MPI_GROUP_NULL) { 1154 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1155 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1156 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1157 } 1158 *outgoing = sf->outgroup; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 /*@ 1163 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1164 1165 Collective 1166 1167 Input Parameter: 1168 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1169 1170 Output Parameter: 1171 . multi - star forest with split roots, such that each root has degree exactly 1 1172 1173 Level: developer 1174 1175 Notes: 1176 1177 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1178 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1179 edge, it is a candidate for future optimization that might involve its removal. 1180 1181 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1182 @*/ 1183 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1184 { 1185 PetscErrorCode ierr; 1186 1187 PetscFunctionBegin; 1188 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1189 PetscValidPointer(multi,2); 1190 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1191 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1192 *multi = sf->multi; 1193 sf->multi->multi = sf->multi; 1194 PetscFunctionReturn(0); 1195 } 1196 if (!sf->multi) { 1197 const PetscInt *indegree; 1198 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1199 PetscSFNode *remote; 1200 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1201 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1202 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1203 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1204 inoffset[0] = 0; 1205 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1206 for (i=0; i<maxlocal; i++) outones[i] = 1; 1207 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1208 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1209 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1210 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1211 for (i=0; i<sf->nroots; i++) { 1212 PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1213 } 1214 } 1215 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1216 for (i=0; i<sf->nleaves; i++) { 1217 remote[i].rank = sf->remote[i].rank; 1218 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1219 } 1220 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1221 sf->multi->multi = sf->multi; 1222 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1223 if (sf->rankorder) { /* Sort the ranks */ 1224 PetscMPIInt rank; 1225 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1226 PetscSFNode *newremote; 1227 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1228 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1229 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1230 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1231 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1232 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1233 /* Sort the incoming ranks at each vertex, build the inverse map */ 1234 for (i=0; i<sf->nroots; i++) { 1235 PetscInt j; 1236 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1237 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1238 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1239 } 1240 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1241 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1242 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1243 for (i=0; i<sf->nleaves; i++) { 1244 newremote[i].rank = sf->remote[i].rank; 1245 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1246 } 1247 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1248 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1249 } 1250 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1251 } 1252 *multi = sf->multi; 1253 PetscFunctionReturn(0); 1254 } 1255 1256 /*@C 1257 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1258 1259 Collective 1260 1261 Input Parameters: 1262 + sf - original star forest 1263 . nselected - number of selected roots on this process 1264 - selected - indices of the selected roots on this process 1265 1266 Output Parameter: 1267 . esf - new star forest 1268 1269 Level: advanced 1270 1271 Note: 1272 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1273 be done by calling PetscSFGetGraph(). 1274 1275 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1276 @*/ 1277 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1278 { 1279 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1280 const PetscInt *ilocal; 1281 signed char *rootdata,*leafdata,*leafmem; 1282 const PetscSFNode *iremote; 1283 PetscSFNode *new_iremote; 1284 MPI_Comm comm; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1289 PetscSFCheckGraphSet(sf,1); 1290 if (nselected) PetscValidPointer(selected,3); 1291 PetscValidPointer(esf,4); 1292 1293 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1294 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1295 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1296 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1297 1298 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1299 PetscBool dups; 1300 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1301 PetscCheckFalse(dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1302 for (i=0; i<nselected; i++) 1303 PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots); 1304 } 1305 1306 if (sf->ops->CreateEmbeddedRootSF) { 1307 ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1308 } else { 1309 /* A generic version of creating embedded sf */ 1310 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1311 maxlocal = maxleaf - minleaf + 1; 1312 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1313 leafdata = leafmem - minleaf; 1314 /* Tag selected roots and bcast to leaves */ 1315 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1316 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1317 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1318 1319 /* Build esf with leaves that are still connected */ 1320 esf_nleaves = 0; 1321 for (i=0; i<nleaves; i++) { 1322 j = ilocal ? ilocal[i] : i; 1323 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1324 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1325 */ 1326 esf_nleaves += (leafdata[j] ? 1 : 0); 1327 } 1328 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1329 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1330 for (i=n=0; i<nleaves; i++) { 1331 j = ilocal ? ilocal[i] : i; 1332 if (leafdata[j]) { 1333 new_ilocal[n] = j; 1334 new_iremote[n].rank = iremote[i].rank; 1335 new_iremote[n].index = iremote[i].index; 1336 ++n; 1337 } 1338 } 1339 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1340 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1341 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1342 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1343 } 1344 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 /*@C 1349 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1350 1351 Collective 1352 1353 Input Parameters: 1354 + sf - original star forest 1355 . nselected - number of selected leaves on this process 1356 - selected - indices of the selected leaves on this process 1357 1358 Output Parameter: 1359 . newsf - new star forest 1360 1361 Level: advanced 1362 1363 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1364 @*/ 1365 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1366 { 1367 const PetscSFNode *iremote; 1368 PetscSFNode *new_iremote; 1369 const PetscInt *ilocal; 1370 PetscInt i,nroots,*leaves,*new_ilocal; 1371 MPI_Comm comm; 1372 PetscErrorCode ierr; 1373 1374 PetscFunctionBegin; 1375 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1376 PetscSFCheckGraphSet(sf,1); 1377 if (nselected) PetscValidPointer(selected,3); 1378 PetscValidPointer(newsf,4); 1379 1380 /* Uniq selected[] and put results in leaves[] */ 1381 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1382 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1383 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1384 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1385 PetscCheckFalse(nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves); 1386 1387 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1388 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1389 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1390 } else { 1391 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1392 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1393 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1394 for (i=0; i<nselected; ++i) { 1395 const PetscInt l = leaves[i]; 1396 new_ilocal[i] = ilocal ? ilocal[l] : l; 1397 new_iremote[i].rank = iremote[l].rank; 1398 new_iremote[i].index = iremote[l].index; 1399 } 1400 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1401 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1402 } 1403 ierr = PetscFree(leaves);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 /*@C 1408 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1409 1410 Collective on PetscSF 1411 1412 Input Parameters: 1413 + sf - star forest on which to communicate 1414 . unit - data type associated with each node 1415 . rootdata - buffer to broadcast 1416 - op - operation to use for reduction 1417 1418 Output Parameter: 1419 . leafdata - buffer to be reduced with values from each leaf's respective root 1420 1421 Level: intermediate 1422 1423 Notes: 1424 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1425 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1426 use PetscSFBcastWithMemTypeBegin() instead. 1427 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1428 @*/ 1429 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1430 { 1431 PetscErrorCode ierr; 1432 PetscMemType rootmtype,leafmtype; 1433 1434 PetscFunctionBegin; 1435 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1436 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1437 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1438 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1439 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1440 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1441 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1442 PetscFunctionReturn(0); 1443 } 1444 1445 /*@C 1446 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1447 1448 Collective on PetscSF 1449 1450 Input Parameters: 1451 + sf - star forest on which to communicate 1452 . unit - data type associated with each node 1453 . rootmtype - memory type of rootdata 1454 . rootdata - buffer to broadcast 1455 . leafmtype - memory type of leafdata 1456 - op - operation to use for reduction 1457 1458 Output Parameter: 1459 . leafdata - buffer to be reduced with values from each leaf's respective root 1460 1461 Level: intermediate 1462 1463 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1464 @*/ 1465 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1466 { 1467 PetscErrorCode ierr; 1468 1469 PetscFunctionBegin; 1470 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1471 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1472 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1473 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1474 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1475 PetscFunctionReturn(0); 1476 } 1477 1478 /*@C 1479 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1480 1481 Collective 1482 1483 Input Parameters: 1484 + sf - star forest 1485 . unit - data type 1486 . rootdata - buffer to broadcast 1487 - op - operation to use for reduction 1488 1489 Output Parameter: 1490 . leafdata - buffer to be reduced with values from each leaf's respective root 1491 1492 Level: intermediate 1493 1494 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1495 @*/ 1496 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1497 { 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1502 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1503 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1504 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1505 PetscFunctionReturn(0); 1506 } 1507 1508 /*@C 1509 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1510 1511 Collective 1512 1513 Input Parameters: 1514 + sf - star forest 1515 . unit - data type 1516 . leafdata - values to reduce 1517 - op - reduction operation 1518 1519 Output Parameter: 1520 . rootdata - result of reduction of values from all leaves of each root 1521 1522 Level: intermediate 1523 1524 Notes: 1525 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1526 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1527 use PetscSFReduceWithMemTypeBegin() instead. 1528 1529 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1530 @*/ 1531 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1532 { 1533 PetscErrorCode ierr; 1534 PetscMemType rootmtype,leafmtype; 1535 1536 PetscFunctionBegin; 1537 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1538 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1539 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1540 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1541 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1542 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1543 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1544 PetscFunctionReturn(0); 1545 } 1546 1547 /*@C 1548 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1549 1550 Collective 1551 1552 Input Parameters: 1553 + sf - star forest 1554 . unit - data type 1555 . leafmtype - memory type of leafdata 1556 . leafdata - values to reduce 1557 . rootmtype - memory type of rootdata 1558 - op - reduction operation 1559 1560 Output Parameter: 1561 . rootdata - result of reduction of values from all leaves of each root 1562 1563 Level: intermediate 1564 1565 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1566 @*/ 1567 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1568 { 1569 PetscErrorCode ierr; 1570 1571 PetscFunctionBegin; 1572 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1573 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1574 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1575 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1576 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1577 PetscFunctionReturn(0); 1578 } 1579 1580 /*@C 1581 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1582 1583 Collective 1584 1585 Input Parameters: 1586 + sf - star forest 1587 . unit - data type 1588 . leafdata - values to reduce 1589 - op - reduction operation 1590 1591 Output Parameter: 1592 . rootdata - result of reduction of values from all leaves of each root 1593 1594 Level: intermediate 1595 1596 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1597 @*/ 1598 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1599 { 1600 PetscErrorCode ierr; 1601 1602 PetscFunctionBegin; 1603 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1604 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1605 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1606 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1607 PetscFunctionReturn(0); 1608 } 1609 1610 /*@C 1611 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1612 1613 Collective 1614 1615 Input Parameters: 1616 + sf - star forest 1617 . unit - data type 1618 . leafdata - leaf values to use in reduction 1619 - op - operation to use for reduction 1620 1621 Output Parameters: 1622 + rootdata - root values to be updated, input state is seen by first process to perform an update 1623 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1624 1625 Level: advanced 1626 1627 Note: 1628 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1629 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1630 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1631 integers. 1632 1633 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1634 @*/ 1635 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1636 { 1637 PetscErrorCode ierr; 1638 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1639 1640 PetscFunctionBegin; 1641 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1642 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1643 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1644 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1645 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1646 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1647 PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1648 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1649 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1650 PetscFunctionReturn(0); 1651 } 1652 1653 /*@C 1654 PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1655 1656 Collective 1657 1658 Input Parameters: 1659 + sf - star forest 1660 . unit - data type 1661 . rootmtype - memory type of rootdata 1662 . leafmtype - memory type of leafdata 1663 . leafdata - leaf values to use in reduction 1664 . leafupdatemtype - memory type of leafupdate 1665 - op - operation to use for reduction 1666 1667 Output Parameters: 1668 + rootdata - root values to be updated, input state is seen by first process to perform an update 1669 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1670 1671 Level: advanced 1672 1673 Note: See PetscSFFetchAndOpBegin() for more details. 1674 1675 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1676 @*/ 1677 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op) 1678 { 1679 PetscErrorCode ierr; 1680 1681 PetscFunctionBegin; 1682 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1683 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1684 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1685 PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1686 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1687 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 /*@C 1692 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1693 1694 Collective 1695 1696 Input Parameters: 1697 + sf - star forest 1698 . unit - data type 1699 . leafdata - leaf values to use in reduction 1700 - op - operation to use for reduction 1701 1702 Output Parameters: 1703 + rootdata - root values to be updated, input state is seen by first process to perform an update 1704 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1705 1706 Level: advanced 1707 1708 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1709 @*/ 1710 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1711 { 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1716 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1717 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1718 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 1722 /*@C 1723 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1724 1725 Collective 1726 1727 Input Parameter: 1728 . sf - star forest 1729 1730 Output Parameter: 1731 . degree - degree of each root vertex 1732 1733 Level: advanced 1734 1735 Notes: 1736 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1737 1738 .seealso: PetscSFGatherBegin() 1739 @*/ 1740 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1741 { 1742 PetscErrorCode ierr; 1743 1744 PetscFunctionBegin; 1745 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1746 PetscSFCheckGraphSet(sf,1); 1747 PetscValidPointer(degree,2); 1748 if (!sf->degreeknown) { 1749 PetscInt i, nroots = sf->nroots, maxlocal; 1750 PetscCheckFalse(sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1751 maxlocal = sf->maxleaf-sf->minleaf+1; 1752 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1753 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1754 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1755 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1756 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1757 } 1758 *degree = NULL; 1759 PetscFunctionReturn(0); 1760 } 1761 1762 /*@C 1763 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1764 1765 Collective 1766 1767 Input Parameter: 1768 . sf - star forest 1769 1770 Output Parameter: 1771 . degree - degree of each root vertex 1772 1773 Level: developer 1774 1775 Notes: 1776 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1777 1778 .seealso: 1779 @*/ 1780 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1781 { 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1786 PetscSFCheckGraphSet(sf,1); 1787 PetscValidPointer(degree,2); 1788 if (!sf->degreeknown) { 1789 PetscCheckFalse(!sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1790 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1791 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1792 sf->degreeknown = PETSC_TRUE; 1793 } 1794 *degree = sf->degree; 1795 PetscFunctionReturn(0); 1796 } 1797 1798 /*@C 1799 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1800 Each multi-root is assigned index of the corresponding original root. 1801 1802 Collective 1803 1804 Input Parameters: 1805 + sf - star forest 1806 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1807 1808 Output Parameters: 1809 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1810 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1811 1812 Level: developer 1813 1814 Notes: 1815 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1816 1817 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1818 @*/ 1819 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1820 { 1821 PetscSF msf; 1822 PetscInt i, j, k, nroots, nmroots; 1823 PetscErrorCode ierr; 1824 1825 PetscFunctionBegin; 1826 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1827 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1828 if (nroots) PetscValidIntPointer(degree,2); 1829 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1830 PetscValidPointer(multiRootsOrigNumbering,4); 1831 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1832 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1833 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1834 for (i=0,j=0,k=0; i<nroots; i++) { 1835 if (!degree[i]) continue; 1836 for (j=0; j<degree[i]; j++,k++) { 1837 (*multiRootsOrigNumbering)[k] = i; 1838 } 1839 } 1840 PetscCheckFalse(k != nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1841 if (nMultiRoots) *nMultiRoots = nmroots; 1842 PetscFunctionReturn(0); 1843 } 1844 1845 /*@C 1846 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1847 1848 Collective 1849 1850 Input Parameters: 1851 + sf - star forest 1852 . unit - data type 1853 - leafdata - leaf data to gather to roots 1854 1855 Output Parameter: 1856 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1857 1858 Level: intermediate 1859 1860 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1861 @*/ 1862 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1863 { 1864 PetscErrorCode ierr; 1865 PetscSF multi = NULL; 1866 1867 PetscFunctionBegin; 1868 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1869 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1870 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1871 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 /*@C 1876 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1877 1878 Collective 1879 1880 Input Parameters: 1881 + sf - star forest 1882 . unit - data type 1883 - leafdata - leaf data to gather to roots 1884 1885 Output Parameter: 1886 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1887 1888 Level: intermediate 1889 1890 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1891 @*/ 1892 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1893 { 1894 PetscErrorCode ierr; 1895 PetscSF multi = NULL; 1896 1897 PetscFunctionBegin; 1898 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1899 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1900 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 /*@C 1905 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1906 1907 Collective 1908 1909 Input Parameters: 1910 + sf - star forest 1911 . unit - data type 1912 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1913 1914 Output Parameter: 1915 . leafdata - leaf data to be update with personal data from each respective root 1916 1917 Level: intermediate 1918 1919 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1920 @*/ 1921 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1922 { 1923 PetscErrorCode ierr; 1924 PetscSF multi = NULL; 1925 1926 PetscFunctionBegin; 1927 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1928 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1929 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1930 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 /*@C 1935 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1936 1937 Collective 1938 1939 Input Parameters: 1940 + sf - star forest 1941 . unit - data type 1942 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1943 1944 Output Parameter: 1945 . leafdata - leaf data to be update with personal data from each respective root 1946 1947 Level: intermediate 1948 1949 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1950 @*/ 1951 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1952 { 1953 PetscErrorCode ierr; 1954 PetscSF multi = NULL; 1955 1956 PetscFunctionBegin; 1957 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1958 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1959 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1964 { 1965 PetscInt i, n, nleaves; 1966 const PetscInt *ilocal = NULL; 1967 PetscHSetI seen; 1968 PetscErrorCode ierr; 1969 1970 PetscFunctionBegin; 1971 if (PetscDefined(USE_DEBUG)) { 1972 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1973 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1974 for (i = 0; i < nleaves; i++) { 1975 const PetscInt leaf = ilocal ? ilocal[i] : i; 1976 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1977 } 1978 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1979 PetscCheckFalse(n != nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1980 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1981 } 1982 PetscFunctionReturn(0); 1983 } 1984 1985 /*@ 1986 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1987 1988 Input Parameters: 1989 + sfA - The first PetscSF 1990 - sfB - The second PetscSF 1991 1992 Output Parameters: 1993 . sfBA - The composite SF 1994 1995 Level: developer 1996 1997 Notes: 1998 Currently, the two SFs must be defined on congruent communicators and they must be true star 1999 forests, i.e. the same leaf is not connected with different roots. 2000 2001 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 2002 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 2003 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 2004 Bcast on sfA, then a Bcast on sfB, on connected nodes. 2005 2006 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 2007 @*/ 2008 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2009 { 2010 PetscErrorCode ierr; 2011 const PetscSFNode *remotePointsA,*remotePointsB; 2012 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 2013 const PetscInt *localPointsA,*localPointsB; 2014 PetscInt *localPointsBA; 2015 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 2016 PetscBool denseB; 2017 2018 PetscFunctionBegin; 2019 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2020 PetscSFCheckGraphSet(sfA,1); 2021 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2022 PetscSFCheckGraphSet(sfB,2); 2023 PetscCheckSameComm(sfA,1,sfB,2); 2024 PetscValidPointer(sfBA,3); 2025 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2026 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2027 2028 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2029 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2030 if (localPointsA) { 2031 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2032 for (i=0; i<numRootsB; i++) { 2033 reorderedRemotePointsA[i].rank = -1; 2034 reorderedRemotePointsA[i].index = -1; 2035 } 2036 for (i=0; i<numLeavesA; i++) { 2037 if (localPointsA[i] >= numRootsB) continue; 2038 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2039 } 2040 remotePointsA = reorderedRemotePointsA; 2041 } 2042 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2043 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2044 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2045 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2046 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2047 2048 denseB = (PetscBool)!localPointsB; 2049 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2050 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2051 else numLeavesBA++; 2052 } 2053 if (denseB) { 2054 localPointsBA = NULL; 2055 remotePointsBA = leafdataB; 2056 } else { 2057 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2058 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2059 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2060 const PetscInt l = localPointsB ? localPointsB[i] : i; 2061 2062 if (leafdataB[l-minleaf].rank == -1) continue; 2063 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2064 localPointsBA[numLeavesBA] = l; 2065 numLeavesBA++; 2066 } 2067 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2068 } 2069 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2070 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2071 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 /*@ 2076 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2077 2078 Input Parameters: 2079 + sfA - The first PetscSF 2080 - sfB - The second PetscSF 2081 2082 Output Parameters: 2083 . sfBA - The composite SF. 2084 2085 Level: developer 2086 2087 Notes: 2088 Currently, the two SFs must be defined on congruent communicators and they must be true star 2089 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2090 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2091 2092 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2093 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2094 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2095 a Reduce on sfB, on connected roots. 2096 2097 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2098 @*/ 2099 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2100 { 2101 PetscErrorCode ierr; 2102 const PetscSFNode *remotePointsA,*remotePointsB; 2103 PetscSFNode *remotePointsBA; 2104 const PetscInt *localPointsA,*localPointsB; 2105 PetscSFNode *reorderedRemotePointsA = NULL; 2106 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2107 MPI_Op op; 2108 #if defined(PETSC_USE_64BIT_INDICES) 2109 PetscBool iswin; 2110 #endif 2111 2112 PetscFunctionBegin; 2113 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2114 PetscSFCheckGraphSet(sfA,1); 2115 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2116 PetscSFCheckGraphSet(sfB,2); 2117 PetscCheckSameComm(sfA,1,sfB,2); 2118 PetscValidPointer(sfBA,3); 2119 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2120 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2121 2122 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2123 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2124 2125 /* TODO: Check roots of sfB have degree of 1 */ 2126 /* Once we implement it, we can replace the MPI_MAXLOC 2127 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2128 We use MPI_MAXLOC only to have a deterministic output from this routine if 2129 the root condition is not meet. 2130 */ 2131 op = MPI_MAXLOC; 2132 #if defined(PETSC_USE_64BIT_INDICES) 2133 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2134 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2135 if (iswin) op = MPI_REPLACE; 2136 #endif 2137 2138 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2139 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2140 for (i=0; i<maxleaf - minleaf + 1; i++) { 2141 reorderedRemotePointsA[i].rank = -1; 2142 reorderedRemotePointsA[i].index = -1; 2143 } 2144 if (localPointsA) { 2145 for (i=0; i<numLeavesA; i++) { 2146 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2147 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2148 } 2149 } else { 2150 for (i=0; i<numLeavesA; i++) { 2151 if (i > maxleaf || i < minleaf) continue; 2152 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2153 } 2154 } 2155 2156 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2157 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2158 for (i=0; i<numRootsB; i++) { 2159 remotePointsBA[i].rank = -1; 2160 remotePointsBA[i].index = -1; 2161 } 2162 2163 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2164 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2165 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2166 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2167 if (remotePointsBA[i].rank == -1) continue; 2168 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2169 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2170 localPointsBA[numLeavesBA] = i; 2171 numLeavesBA++; 2172 } 2173 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2174 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2175 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2181 2182 Input Parameters: 2183 . sf - The global PetscSF 2184 2185 Output Parameters: 2186 . out - The local PetscSF 2187 */ 2188 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2189 { 2190 MPI_Comm comm; 2191 PetscMPIInt myrank; 2192 const PetscInt *ilocal; 2193 const PetscSFNode *iremote; 2194 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2195 PetscSFNode *liremote; 2196 PetscSF lsf; 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2201 if (sf->ops->CreateLocalSF) { 2202 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2203 } else { 2204 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2205 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2206 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2207 2208 /* Find out local edges and build a local SF */ 2209 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2210 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2211 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2212 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2213 2214 for (i=j=0; i<nleaves; i++) { 2215 if (iremote[i].rank == (PetscInt)myrank) { 2216 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2217 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2218 liremote[j].index = iremote[i].index; 2219 j++; 2220 } 2221 } 2222 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2223 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2224 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2225 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2226 *out = lsf; 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2232 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2233 { 2234 PetscErrorCode ierr; 2235 PetscMemType rootmtype,leafmtype; 2236 2237 PetscFunctionBegin; 2238 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2239 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2240 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2241 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2242 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2243 if (sf->ops->BcastToZero) { 2244 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2245 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2246 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 /*@ 2251 PetscSFConcatenate - concatenate multiple SFs into one 2252 2253 Input Parameters: 2254 + comm - the communicator 2255 . nsfs - the number of input PetscSF 2256 . sfs - the array of input PetscSF 2257 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE) 2258 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage 2259 2260 Output Parameters: 2261 . newsf - The resulting PetscSF 2262 2263 Level: developer 2264 2265 Notes: 2266 The communicator of all SFs in sfs must be comm. 2267 2268 The offsets in leafOffsets are added to the original leaf indices. 2269 2270 If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well. 2271 In this case, NULL is also passed as ilocal to the resulting SF. 2272 2273 If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs. 2274 In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs). 2275 2276 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph() 2277 @*/ 2278 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) 2279 { 2280 PetscInt i, s, nLeaves, nRoots; 2281 PetscInt *leafArrayOffsets; 2282 PetscInt *ilocal_new; 2283 PetscSFNode *iremote_new; 2284 PetscInt *rootOffsets; 2285 PetscBool all_ilocal_null = PETSC_FALSE; 2286 PetscMPIInt rank; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 { 2291 PetscSF dummy; /* just to have a PetscObject on comm for input validation */ 2292 2293 ierr = PetscSFCreate(comm,&dummy);CHKERRQ(ierr); 2294 PetscValidLogicalCollectiveInt(dummy,nsfs,2); 2295 PetscValidPointer(sfs,3); 2296 for (i=0; i<nsfs; i++) { 2297 PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3); 2298 PetscCheckSameComm(dummy,1,sfs[i],3); 2299 } 2300 PetscValidLogicalCollectiveBool(dummy,shareRoots,4); 2301 if (leafOffsets) PetscValidIntPointer(leafOffsets,5); 2302 PetscValidPointer(newsf,6); 2303 ierr = PetscSFDestroy(&dummy);CHKERRQ(ierr); 2304 } 2305 if (!nsfs) { 2306 ierr = PetscSFCreate(comm, newsf);CHKERRQ(ierr); 2307 ierr = PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);CHKERRQ(ierr); 2308 PetscFunctionReturn(0); 2309 } 2310 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 2311 2312 ierr = PetscCalloc1(nsfs+1, &rootOffsets);CHKERRQ(ierr); 2313 if (shareRoots) { 2314 ierr = PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL);CHKERRQ(ierr); 2315 if (PetscDefined(USE_DEBUG)) { 2316 for (s=1; s<nsfs; s++) { 2317 PetscInt nr; 2318 2319 ierr = PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);CHKERRQ(ierr); 2320 PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots); 2321 } 2322 } 2323 } else { 2324 for (s=0; s<nsfs; s++) { 2325 PetscInt nr; 2326 2327 ierr = PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);CHKERRQ(ierr); 2328 rootOffsets[s+1] = rootOffsets[s] + nr; 2329 } 2330 nRoots = rootOffsets[nsfs]; 2331 } 2332 2333 /* Calculate leaf array offsets and automatic root offsets */ 2334 ierr = PetscMalloc1(nsfs+1,&leafArrayOffsets);CHKERRQ(ierr); 2335 leafArrayOffsets[0] = 0; 2336 for (s=0; s<nsfs; s++) { 2337 PetscInt nl; 2338 2339 ierr = PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL);CHKERRQ(ierr); 2340 leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl; 2341 } 2342 nLeaves = leafArrayOffsets[nsfs]; 2343 2344 if (!leafOffsets) { 2345 all_ilocal_null = PETSC_TRUE; 2346 for (s=0; s<nsfs; s++) { 2347 const PetscInt *ilocal; 2348 2349 ierr = PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL);CHKERRQ(ierr); 2350 if (ilocal) { 2351 all_ilocal_null = PETSC_FALSE; 2352 break; 2353 } 2354 } 2355 PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL"); 2356 } 2357 2358 /* Renumber and concatenate local leaves */ 2359 ilocal_new = NULL; 2360 if (!all_ilocal_null) { 2361 ierr = PetscMalloc1(nLeaves, &ilocal_new);CHKERRQ(ierr); 2362 for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1; 2363 for (s = 0; s<nsfs; s++) { 2364 const PetscInt *ilocal; 2365 PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]]; 2366 PetscInt i, nleaves_l; 2367 2368 ierr = PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL);CHKERRQ(ierr); 2369 for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s]; 2370 } 2371 } 2372 2373 /* Renumber and concatenate remote roots */ 2374 ierr = PetscMalloc1(nLeaves, &iremote_new);CHKERRQ(ierr); 2375 for (i = 0; i < nLeaves; i++) { 2376 iremote_new[i].rank = -1; 2377 iremote_new[i].index = -1; 2378 } 2379 for (s = 0; s<nsfs; s++) { 2380 PetscInt i, nl, nr; 2381 PetscSF tmp_sf; 2382 const PetscSFNode *iremote; 2383 PetscSFNode *tmp_rootdata; 2384 PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]]; 2385 2386 ierr = PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote);CHKERRQ(ierr); 2387 ierr = PetscSFCreate(comm, &tmp_sf);CHKERRQ(ierr); 2388 /* create helper SF with contiguous leaves */ 2389 ierr = PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES);CHKERRQ(ierr); 2390 ierr = PetscSFSetUp(tmp_sf);CHKERRQ(ierr); 2391 ierr = PetscMalloc1(nr, &tmp_rootdata);CHKERRQ(ierr); 2392 for (i = 0; i < nr; i++) { 2393 tmp_rootdata[i].index = i + rootOffsets[s]; 2394 tmp_rootdata[i].rank = (PetscInt) rank; 2395 } 2396 ierr = PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);CHKERRQ(ierr); 2397 ierr = PetscSFBcastEnd( tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);CHKERRQ(ierr); 2398 ierr = PetscSFDestroy(&tmp_sf);CHKERRQ(ierr); 2399 ierr = PetscFree(tmp_rootdata);CHKERRQ(ierr); 2400 } 2401 2402 /* Build the new SF */ 2403 ierr = PetscSFCreate(comm, newsf);CHKERRQ(ierr); 2404 ierr = PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER);CHKERRQ(ierr); 2405 ierr = PetscSFSetUp(*newsf);CHKERRQ(ierr); 2406 ierr = PetscFree(rootOffsets);CHKERRQ(ierr); 2407 ierr = PetscFree(leafArrayOffsets);CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410