1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petsc/private/viewerimpl.h> 4 #include <petscctable.h> 5 6 #if defined(PETSC_HAVE_CUDA) 7 #include <cuda_runtime.h> 8 #endif 9 10 #if defined(PETSC_HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 #endif 13 14 #if defined(PETSC_USE_DEBUG) 15 # define PetscSFCheckGraphSet(sf,arg) do { \ 16 if (PetscUnlikely(!(sf)->graphset)) \ 17 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 18 } while (0) 19 #else 20 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 21 #endif 22 23 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 24 25 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 26 { 27 PetscFunctionBegin; 28 PetscValidPointer(type,2); 29 *type = PETSC_MEMTYPE_HOST; 30 #if defined(PETSC_HAVE_CUDA) 31 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA) && data) { 32 cudaError_t cerr; 33 struct cudaPointerAttributes attr; 34 enum cudaMemoryType mtype; 35 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 36 cudaGetLastError(); /* Reset the last error */ 37 #if (CUDART_VERSION < 10000) 38 mtype = attr.memoryType; 39 #else 40 mtype = attr.type; 41 #endif 42 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 43 } 44 #endif 45 46 #if defined(PETSC_HAVE_HIP) 47 if (PetscDeviceInitialized(PETSC_DEVICE_HIP) && data) { 48 hipError_t cerr; 49 struct hipPointerAttribute_t attr; 50 enum hipMemoryType mtype; 51 cerr = hipPointerGetAttributes(&attr,data); 52 hipGetLastError(); /* Reset the last error */ 53 mtype = attr.memoryType; 54 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 55 } 56 #endif 57 PetscFunctionReturn(0); 58 } 59 60 /*@ 61 PetscSFCreate - create a star forest communication context 62 63 Collective 64 65 Input Parameter: 66 . comm - communicator on which the star forest will operate 67 68 Output Parameter: 69 . sf - new star forest context 70 71 Options Database Keys: 72 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 73 . -sf_type window -Use MPI-3 one-sided window for communication 74 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 75 76 Level: intermediate 77 78 Notes: 79 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 80 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 81 SFs are optimized and they have better performance than general SFs. 82 83 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 84 @*/ 85 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 86 { 87 PetscErrorCode ierr; 88 PetscSF b; 89 90 PetscFunctionBegin; 91 PetscValidPointer(sf,2); 92 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 93 94 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 95 96 b->nroots = -1; 97 b->nleaves = -1; 98 b->minleaf = PETSC_MAX_INT; 99 b->maxleaf = PETSC_MIN_INT; 100 b->nranks = -1; 101 b->rankorder = PETSC_TRUE; 102 b->ingroup = MPI_GROUP_NULL; 103 b->outgroup = MPI_GROUP_NULL; 104 b->graphset = PETSC_FALSE; 105 #if defined(PETSC_HAVE_DEVICE) 106 b->use_gpu_aware_mpi = use_gpu_aware_mpi; 107 b->use_stream_aware_mpi = PETSC_FALSE; 108 b->unknown_input_stream= PETSC_FALSE; 109 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 110 b->backend = PETSCSF_BACKEND_KOKKOS; 111 #elif defined(PETSC_HAVE_CUDA) 112 b->backend = PETSCSF_BACKEND_CUDA; 113 #elif defined(PETSC_HAVE_HIP) 114 b->backend = PETSCSF_BACKEND_HIP; 115 #endif 116 117 #if defined(PETSC_HAVE_NVSHMEM) 118 b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */ 119 b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */ 120 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr); 121 ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr); 122 #endif 123 #endif 124 b->vscat.from_n = -1; 125 b->vscat.to_n = -1; 126 b->vscat.unit = MPIU_SCALAR; 127 *sf = b; 128 PetscFunctionReturn(0); 129 } 130 131 /*@ 132 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 133 134 Collective 135 136 Input Parameter: 137 . sf - star forest 138 139 Level: advanced 140 141 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 142 @*/ 143 PetscErrorCode PetscSFReset(PetscSF sf) 144 { 145 PetscErrorCode ierr; 146 147 PetscFunctionBegin; 148 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 149 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 150 sf->nroots = -1; 151 sf->nleaves = -1; 152 sf->minleaf = PETSC_MAX_INT; 153 sf->maxleaf = PETSC_MIN_INT; 154 sf->mine = NULL; 155 sf->remote = NULL; 156 sf->graphset = PETSC_FALSE; 157 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 158 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 159 sf->nranks = -1; 160 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 161 sf->degreeknown = PETSC_FALSE; 162 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 163 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);} 164 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);} 165 if (sf->multi) sf->multi->multi = NULL; 166 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 167 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 168 169 #if defined(PETSC_HAVE_DEVICE) 170 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 171 #endif 172 173 sf->setupcalled = PETSC_FALSE; 174 PetscFunctionReturn(0); 175 } 176 177 /*@C 178 PetscSFSetType - Set the PetscSF communication implementation 179 180 Collective on PetscSF 181 182 Input Parameters: 183 + sf - the PetscSF context 184 - type - a known method 185 186 Options Database Key: 187 . -sf_type <type> - Sets the method; use -help for a list 188 of available methods (for instance, window, basic, neighbor) 189 190 Notes: 191 See "include/petscsf.h" for available methods (for instance) 192 + PETSCSFWINDOW - MPI-2/3 one-sided 193 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 194 195 Level: intermediate 196 197 .seealso: PetscSFType, PetscSFCreate() 198 @*/ 199 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 200 { 201 PetscErrorCode ierr,(*r)(PetscSF); 202 PetscBool match; 203 204 PetscFunctionBegin; 205 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 206 PetscValidCharPointer(type,2); 207 208 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 209 if (match) PetscFunctionReturn(0); 210 211 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 212 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 213 /* Destroy the previous PetscSF implementation context */ 214 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 215 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 216 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 217 ierr = (*r)(sf);CHKERRQ(ierr); 218 PetscFunctionReturn(0); 219 } 220 221 /*@C 222 PetscSFGetType - Get the PetscSF communication implementation 223 224 Not Collective 225 226 Input Parameter: 227 . sf - the PetscSF context 228 229 Output Parameter: 230 . type - the PetscSF type name 231 232 Level: intermediate 233 234 .seealso: PetscSFSetType(), PetscSFCreate() 235 @*/ 236 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 237 { 238 PetscFunctionBegin; 239 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 240 PetscValidPointer(type,2); 241 *type = ((PetscObject)sf)->type_name; 242 PetscFunctionReturn(0); 243 } 244 245 /*@C 246 PetscSFDestroy - destroy star forest 247 248 Collective 249 250 Input Parameter: 251 . sf - address of star forest 252 253 Level: intermediate 254 255 .seealso: PetscSFCreate(), PetscSFReset() 256 @*/ 257 PetscErrorCode PetscSFDestroy(PetscSF *sf) 258 { 259 PetscErrorCode ierr; 260 261 PetscFunctionBegin; 262 if (!*sf) PetscFunctionReturn(0); 263 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 264 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 265 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 266 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 267 ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr); 268 if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);} 269 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 274 { 275 PetscInt i, nleaves; 276 PetscMPIInt size; 277 const PetscInt *ilocal; 278 const PetscSFNode *iremote; 279 PetscErrorCode ierr; 280 281 PetscFunctionBegin; 282 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 283 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 284 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 285 for (i = 0; i < nleaves; i++) { 286 const PetscInt rank = iremote[i].rank; 287 const PetscInt remote = iremote[i].index; 288 const PetscInt leaf = ilocal ? ilocal[i] : i; 289 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 290 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 291 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 292 } 293 PetscFunctionReturn(0); 294 } 295 296 /*@ 297 PetscSFSetUp - set up communication structures 298 299 Collective 300 301 Input Parameter: 302 . sf - star forest communication object 303 304 Level: beginner 305 306 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 307 @*/ 308 PetscErrorCode PetscSFSetUp(PetscSF sf) 309 { 310 PetscErrorCode ierr; 311 312 PetscFunctionBegin; 313 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 314 PetscSFCheckGraphSet(sf,1); 315 if (sf->setupcalled) PetscFunctionReturn(0); 316 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 317 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 318 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 319 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 320 #if defined(PETSC_HAVE_CUDA) 321 if (sf->backend == PETSCSF_BACKEND_CUDA) { 322 sf->ops->Malloc = PetscSFMalloc_CUDA; 323 sf->ops->Free = PetscSFFree_CUDA; 324 } 325 #endif 326 #if defined(PETSC_HAVE_HIP) 327 if (sf->backend == PETSCSF_BACKEND_HIP) { 328 sf->ops->Malloc = PetscSFMalloc_HIP; 329 sf->ops->Free = PetscSFFree_HIP; 330 } 331 #endif 332 333 # 334 #if defined(PETSC_HAVE_KOKKOS) 335 if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 336 sf->ops->Malloc = PetscSFMalloc_Kokkos; 337 sf->ops->Free = PetscSFFree_Kokkos; 338 } 339 #endif 340 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 341 sf->setupcalled = PETSC_TRUE; 342 PetscFunctionReturn(0); 343 } 344 345 /*@ 346 PetscSFSetFromOptions - set PetscSF options using the options database 347 348 Logically Collective 349 350 Input Parameter: 351 . sf - star forest 352 353 Options Database Keys: 354 + -sf_type - implementation type, see PetscSFSetType() 355 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 356 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 357 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 358 If true, this option only works with -use_gpu_aware_mpi 1. 359 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 360 If true, this option only works with -use_gpu_aware_mpi 1. 361 362 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos. 363 On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices, 364 the only available is kokkos. 365 366 Level: intermediate 367 @*/ 368 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 369 { 370 PetscSFType deft; 371 char type[256]; 372 PetscErrorCode ierr; 373 PetscBool flg; 374 375 PetscFunctionBegin; 376 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 377 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 378 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 379 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 380 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 381 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 382 #if defined(PETSC_HAVE_DEVICE) 383 { 384 char backendstr[32] = {0}; 385 PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 386 /* Change the defaults set in PetscSFCreate() with command line options */ 387 ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr); 388 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 389 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 390 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 391 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 392 ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr); 393 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 394 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 395 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 396 else if (isHip) sf->backend = PETSCSF_BACKEND_HIP; 397 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr); 398 #elif defined(PETSC_HAVE_KOKKOS) 399 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 400 #endif 401 } 402 #endif 403 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 404 ierr = PetscOptionsEnd();CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 /*@ 409 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 410 411 Logically Collective 412 413 Input Parameters: 414 + sf - star forest 415 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 416 417 Level: advanced 418 419 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 420 @*/ 421 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 422 { 423 PetscFunctionBegin; 424 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 425 PetscValidLogicalCollectiveBool(sf,flg,2); 426 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 427 sf->rankorder = flg; 428 PetscFunctionReturn(0); 429 } 430 431 /*@C 432 PetscSFSetGraph - Set a parallel star forest 433 434 Collective 435 436 Input Parameters: 437 + sf - star forest 438 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 439 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 440 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 441 during setup in debug mode) 442 . localmode - copy mode for ilocal 443 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 444 during setup in debug mode) 445 - remotemode - copy mode for iremote 446 447 Level: intermediate 448 449 Notes: 450 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 451 452 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 453 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 454 needed 455 456 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 457 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 458 459 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 460 @*/ 461 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 462 { 463 PetscErrorCode ierr; 464 465 PetscFunctionBegin; 466 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 467 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 468 if (nleaves > 0) PetscValidPointer(iremote,6); 469 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 470 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 471 472 if (sf->nroots >= 0) { /* Reset only if graph already set */ 473 ierr = PetscSFReset(sf);CHKERRQ(ierr); 474 } 475 476 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 477 478 sf->nroots = nroots; 479 sf->nleaves = nleaves; 480 481 if (nleaves && ilocal) { 482 PetscInt i; 483 PetscInt minleaf = PETSC_MAX_INT; 484 PetscInt maxleaf = PETSC_MIN_INT; 485 int contiguous = 1; 486 for (i=0; i<nleaves; i++) { 487 minleaf = PetscMin(minleaf,ilocal[i]); 488 maxleaf = PetscMax(maxleaf,ilocal[i]); 489 contiguous &= (ilocal[i] == i); 490 } 491 sf->minleaf = minleaf; 492 sf->maxleaf = maxleaf; 493 if (contiguous) { 494 if (localmode == PETSC_OWN_POINTER) { 495 ierr = PetscFree(ilocal);CHKERRQ(ierr); 496 } 497 ilocal = NULL; 498 } 499 } else { 500 sf->minleaf = 0; 501 sf->maxleaf = nleaves - 1; 502 } 503 504 if (ilocal) { 505 switch (localmode) { 506 case PETSC_COPY_VALUES: 507 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 508 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 509 sf->mine = sf->mine_alloc; 510 break; 511 case PETSC_OWN_POINTER: 512 sf->mine_alloc = (PetscInt*)ilocal; 513 sf->mine = sf->mine_alloc; 514 break; 515 case PETSC_USE_POINTER: 516 sf->mine_alloc = NULL; 517 sf->mine = (PetscInt*)ilocal; 518 break; 519 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 520 } 521 } 522 523 switch (remotemode) { 524 case PETSC_COPY_VALUES: 525 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 526 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 527 sf->remote = sf->remote_alloc; 528 break; 529 case PETSC_OWN_POINTER: 530 sf->remote_alloc = (PetscSFNode*)iremote; 531 sf->remote = sf->remote_alloc; 532 break; 533 case PETSC_USE_POINTER: 534 sf->remote_alloc = NULL; 535 sf->remote = (PetscSFNode*)iremote; 536 break; 537 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 538 } 539 540 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 541 sf->graphset = PETSC_TRUE; 542 PetscFunctionReturn(0); 543 } 544 545 /*@ 546 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 547 548 Collective 549 550 Input Parameters: 551 + sf - The PetscSF 552 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 553 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 554 555 Notes: 556 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 557 n and N are local and global sizes of x respectively. 558 559 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 560 sequential vectors y on all ranks. 561 562 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 563 sequential vector y on rank 0. 564 565 In above cases, entries of x are roots and entries of y are leaves. 566 567 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 568 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 569 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 570 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 571 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 572 573 In this case, roots and leaves are symmetric. 574 575 Level: intermediate 576 @*/ 577 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 578 { 579 MPI_Comm comm; 580 PetscInt n,N,res[2]; 581 PetscMPIInt rank,size; 582 PetscSFType type; 583 PetscErrorCode ierr; 584 585 PetscFunctionBegin; 586 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 587 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 588 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 589 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 590 591 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 592 type = PETSCSFALLTOALL; 593 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 594 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 595 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 596 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 597 } else { 598 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 599 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 600 res[0] = n; 601 res[1] = -n; 602 /* Check if n are same over all ranks so that we can optimize it */ 603 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr); 604 if (res[0] == -res[1]) { /* same n */ 605 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 606 } else { 607 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 608 } 609 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 610 } 611 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 612 613 sf->pattern = pattern; 614 sf->mine = NULL; /* Contiguous */ 615 616 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 617 Also set other easy stuff. 618 */ 619 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 620 sf->nleaves = N; 621 sf->nroots = n; 622 sf->nranks = size; 623 sf->minleaf = 0; 624 sf->maxleaf = N - 1; 625 } else if (pattern == PETSCSF_PATTERN_GATHER) { 626 sf->nleaves = rank ? 0 : N; 627 sf->nroots = n; 628 sf->nranks = rank ? 0 : size; 629 sf->minleaf = 0; 630 sf->maxleaf = rank ? -1 : N - 1; 631 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 632 sf->nleaves = size; 633 sf->nroots = size; 634 sf->nranks = size; 635 sf->minleaf = 0; 636 sf->maxleaf = size - 1; 637 } 638 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 639 sf->graphset = PETSC_TRUE; 640 PetscFunctionReturn(0); 641 } 642 643 /*@ 644 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 645 646 Collective 647 648 Input Parameter: 649 . sf - star forest to invert 650 651 Output Parameter: 652 . isf - inverse of sf 653 654 Level: advanced 655 656 Notes: 657 All roots must have degree 1. 658 659 The local space may be a permutation, but cannot be sparse. 660 661 .seealso: PetscSFSetGraph() 662 @*/ 663 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 664 { 665 PetscErrorCode ierr; 666 PetscMPIInt rank; 667 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 668 const PetscInt *ilocal; 669 PetscSFNode *roots,*leaves; 670 671 PetscFunctionBegin; 672 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 673 PetscSFCheckGraphSet(sf,1); 674 PetscValidPointer(isf,2); 675 676 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 677 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 678 679 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 680 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 681 for (i=0; i<maxlocal; i++) { 682 leaves[i].rank = rank; 683 leaves[i].index = i; 684 } 685 for (i=0; i <nroots; i++) { 686 roots[i].rank = -1; 687 roots[i].index = -1; 688 } 689 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 690 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr); 691 692 /* Check whether our leaves are sparse */ 693 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 694 if (count == nroots) newilocal = NULL; 695 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 696 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 697 for (i=0,count=0; i<nroots; i++) { 698 if (roots[i].rank >= 0) { 699 newilocal[count] = i; 700 roots[count].rank = roots[i].rank; 701 roots[count].index = roots[i].index; 702 count++; 703 } 704 } 705 } 706 707 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 708 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 709 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 /*@ 714 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 715 716 Collective 717 718 Input Parameters: 719 + sf - communication object to duplicate 720 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 721 722 Output Parameter: 723 . newsf - new communication object 724 725 Level: beginner 726 727 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 728 @*/ 729 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 730 { 731 PetscSFType type; 732 PetscErrorCode ierr; 733 MPI_Datatype dtype=MPIU_SCALAR; 734 735 PetscFunctionBegin; 736 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 737 PetscValidLogicalCollectiveEnum(sf,opt,2); 738 PetscValidPointer(newsf,3); 739 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 740 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 741 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 742 if (opt == PETSCSF_DUPLICATE_GRAPH) { 743 PetscSFCheckGraphSet(sf,1); 744 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 745 PetscInt nroots,nleaves; 746 const PetscInt *ilocal; 747 const PetscSFNode *iremote; 748 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 749 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 750 } else { 751 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 752 } 753 } 754 /* Since oldtype is committed, so is newtype, according to MPI */ 755 if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);} 756 (*newsf)->vscat.bs = sf->vscat.bs; 757 (*newsf)->vscat.unit = dtype; 758 (*newsf)->vscat.to_n = sf->vscat.to_n; 759 (*newsf)->vscat.from_n = sf->vscat.from_n; 760 /* Do not copy lsf. Build it on demand since it is rarely used */ 761 762 #if defined(PETSC_HAVE_DEVICE) 763 (*newsf)->backend = sf->backend; 764 (*newsf)->unknown_input_stream= sf->unknown_input_stream; 765 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 766 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 767 #endif 768 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 769 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 770 PetscFunctionReturn(0); 771 } 772 773 /*@C 774 PetscSFGetGraph - Get the graph specifying a parallel star forest 775 776 Not Collective 777 778 Input Parameter: 779 . sf - star forest 780 781 Output Parameters: 782 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 783 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 784 . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage) 785 - iremote - remote locations of root vertices for each leaf on the current process 786 787 Notes: 788 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 789 790 Fortran Notes: 791 The returned iremote array is a copy and must be deallocated after use. Consequently, if you 792 want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array. 793 794 To check for a NULL ilocal use 795 $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then 796 797 Level: intermediate 798 799 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 800 @*/ 801 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 802 { 803 PetscErrorCode ierr; 804 805 PetscFunctionBegin; 806 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 807 if (sf->ops->GetGraph) { 808 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 809 } else { 810 if (nroots) *nroots = sf->nroots; 811 if (nleaves) *nleaves = sf->nleaves; 812 if (ilocal) *ilocal = sf->mine; 813 if (iremote) *iremote = sf->remote; 814 } 815 PetscFunctionReturn(0); 816 } 817 818 /*@ 819 PetscSFGetLeafRange - Get the active leaf ranges 820 821 Not Collective 822 823 Input Parameter: 824 . sf - star forest 825 826 Output Parameters: 827 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 828 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 829 830 Level: developer 831 832 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 833 @*/ 834 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 835 { 836 PetscFunctionBegin; 837 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 838 PetscSFCheckGraphSet(sf,1); 839 if (minleaf) *minleaf = sf->minleaf; 840 if (maxleaf) *maxleaf = sf->maxleaf; 841 PetscFunctionReturn(0); 842 } 843 844 /*@C 845 PetscSFViewFromOptions - View from Options 846 847 Collective on PetscSF 848 849 Input Parameters: 850 + A - the star forest 851 . obj - Optional object 852 - name - command line option 853 854 Level: intermediate 855 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 856 @*/ 857 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 858 { 859 PetscErrorCode ierr; 860 861 PetscFunctionBegin; 862 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 863 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 /*@C 868 PetscSFView - view a star forest 869 870 Collective 871 872 Input Parameters: 873 + sf - star forest 874 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 875 876 Level: beginner 877 878 .seealso: PetscSFCreate(), PetscSFSetGraph() 879 @*/ 880 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 881 { 882 PetscErrorCode ierr; 883 PetscBool iascii; 884 PetscViewerFormat format; 885 886 PetscFunctionBegin; 887 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 888 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 889 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 890 PetscCheckSameComm(sf,1,viewer,2); 891 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 892 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 893 if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) { 894 PetscMPIInt rank; 895 PetscInt ii,i,j; 896 897 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 898 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 899 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 900 if (!sf->graphset) { 901 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 906 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 907 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 908 for (i=0; i<sf->nleaves; i++) { 909 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 910 } 911 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 912 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 913 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 914 PetscMPIInt *tmpranks,*perm; 915 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 916 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 917 for (i=0; i<sf->nranks; i++) perm[i] = i; 918 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 919 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 920 for (ii=0; ii<sf->nranks; ii++) { 921 i = perm[ii]; 922 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 923 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 925 } 926 } 927 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 928 } 929 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 930 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 931 } 932 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 933 } 934 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 935 PetscFunctionReturn(0); 936 } 937 938 /*@C 939 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 940 941 Not Collective 942 943 Input Parameter: 944 . sf - star forest 945 946 Output Parameters: 947 + nranks - number of ranks referenced by local part 948 . ranks - array of ranks 949 . roffset - offset in rmine/rremote for each rank (length nranks+1) 950 . rmine - concatenated array holding local indices referencing each remote rank 951 - rremote - concatenated array holding remote indices referenced for each remote rank 952 953 Level: developer 954 955 .seealso: PetscSFGetLeafRanks() 956 @*/ 957 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 958 { 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 963 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 964 if (sf->ops->GetRootRanks) { 965 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 966 } else { 967 /* The generic implementation */ 968 if (nranks) *nranks = sf->nranks; 969 if (ranks) *ranks = sf->ranks; 970 if (roffset) *roffset = sf->roffset; 971 if (rmine) *rmine = sf->rmine; 972 if (rremote) *rremote = sf->rremote; 973 } 974 PetscFunctionReturn(0); 975 } 976 977 /*@C 978 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 979 980 Not Collective 981 982 Input Parameter: 983 . sf - star forest 984 985 Output Parameters: 986 + niranks - number of leaf ranks referencing roots on this process 987 . iranks - array of ranks 988 . ioffset - offset in irootloc for each rank (length niranks+1) 989 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 990 991 Level: developer 992 993 .seealso: PetscSFGetRootRanks() 994 @*/ 995 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 996 { 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1001 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 1002 if (sf->ops->GetLeafRanks) { 1003 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 1004 } else { 1005 PetscSFType type; 1006 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 1007 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 1013 PetscInt i; 1014 for (i=0; i<n; i++) { 1015 if (needle == list[i]) return PETSC_TRUE; 1016 } 1017 return PETSC_FALSE; 1018 } 1019 1020 /*@C 1021 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 1022 1023 Collective 1024 1025 Input Parameters: 1026 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 1027 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 1028 1029 Level: developer 1030 1031 .seealso: PetscSFGetRootRanks() 1032 @*/ 1033 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 1034 { 1035 PetscErrorCode ierr; 1036 PetscTable table; 1037 PetscTablePosition pos; 1038 PetscMPIInt size,groupsize,*groupranks; 1039 PetscInt *rcount,*ranks; 1040 PetscInt i, irank = -1,orank = -1; 1041 1042 PetscFunctionBegin; 1043 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1044 PetscSFCheckGraphSet(sf,1); 1045 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr); 1046 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 1047 for (i=0; i<sf->nleaves; i++) { 1048 /* Log 1-based rank */ 1049 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 1050 } 1051 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 1052 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 1053 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 1054 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 1055 for (i=0; i<sf->nranks; i++) { 1056 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 1057 ranks[i]--; /* Convert back to 0-based */ 1058 } 1059 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1060 1061 /* We expect that dgroup is reliably "small" while nranks could be large */ 1062 { 1063 MPI_Group group = MPI_GROUP_NULL; 1064 PetscMPIInt *dgroupranks; 1065 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1066 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr); 1067 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1068 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1069 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1070 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);} 1071 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1072 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1073 } 1074 1075 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1076 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1077 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1078 if (InList(ranks[i],groupsize,groupranks)) break; 1079 } 1080 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1081 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1082 } 1083 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1084 PetscInt tmprank,tmpcount; 1085 1086 tmprank = ranks[i]; 1087 tmpcount = rcount[i]; 1088 ranks[i] = ranks[sf->ndranks]; 1089 rcount[i] = rcount[sf->ndranks]; 1090 ranks[sf->ndranks] = tmprank; 1091 rcount[sf->ndranks] = tmpcount; 1092 sf->ndranks++; 1093 } 1094 } 1095 ierr = PetscFree(groupranks);CHKERRQ(ierr); 1096 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1097 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 1098 sf->roffset[0] = 0; 1099 for (i=0; i<sf->nranks; i++) { 1100 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 1101 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 1102 rcount[i] = 0; 1103 } 1104 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1105 /* short circuit */ 1106 if (orank != sf->remote[i].rank) { 1107 /* Search for index of iremote[i].rank in sf->ranks */ 1108 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1109 if (irank < 0) { 1110 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1111 if (irank >= 0) irank += sf->ndranks; 1112 } 1113 orank = sf->remote[i].rank; 1114 } 1115 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 1116 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 1117 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1118 rcount[irank]++; 1119 } 1120 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 /*@C 1125 PetscSFGetGroups - gets incoming and outgoing process groups 1126 1127 Collective 1128 1129 Input Parameter: 1130 . sf - star forest 1131 1132 Output Parameters: 1133 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1134 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1135 1136 Level: developer 1137 1138 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1139 @*/ 1140 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1141 { 1142 PetscErrorCode ierr; 1143 MPI_Group group = MPI_GROUP_NULL; 1144 1145 PetscFunctionBegin; 1146 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1147 if (sf->ingroup == MPI_GROUP_NULL) { 1148 PetscInt i; 1149 const PetscInt *indegree; 1150 PetscMPIInt rank,*outranks,*inranks; 1151 PetscSFNode *remote; 1152 PetscSF bgcount; 1153 1154 /* Compute the number of incoming ranks */ 1155 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1156 for (i=0; i<sf->nranks; i++) { 1157 remote[i].rank = sf->ranks[i]; 1158 remote[i].index = 0; 1159 } 1160 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1161 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1162 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1163 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1164 /* Enumerate the incoming ranks */ 1165 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1166 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1167 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1168 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1169 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1170 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1171 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr); 1172 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1173 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1174 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1175 } 1176 *incoming = sf->ingroup; 1177 1178 if (sf->outgroup == MPI_GROUP_NULL) { 1179 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr); 1180 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr); 1181 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 1182 } 1183 *outgoing = sf->outgroup; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 /*@ 1188 PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters 1189 1190 Collective 1191 1192 Input Parameter: 1193 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1194 1195 Output Parameter: 1196 . multi - star forest with split roots, such that each root has degree exactly 1 1197 1198 Level: developer 1199 1200 Notes: 1201 1202 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1203 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1204 edge, it is a candidate for future optimization that might involve its removal. 1205 1206 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1207 @*/ 1208 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1209 { 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1214 PetscValidPointer(multi,2); 1215 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1216 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1217 *multi = sf->multi; 1218 sf->multi->multi = sf->multi; 1219 PetscFunctionReturn(0); 1220 } 1221 if (!sf->multi) { 1222 const PetscInt *indegree; 1223 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1224 PetscSFNode *remote; 1225 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1226 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1227 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1228 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1229 inoffset[0] = 0; 1230 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1231 for (i=0; i<maxlocal; i++) outones[i] = 1; 1232 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1233 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1234 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1235 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1236 for (i=0; i<sf->nroots; i++) { 1237 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1238 } 1239 } 1240 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1241 for (i=0; i<sf->nleaves; i++) { 1242 remote[i].rank = sf->remote[i].rank; 1243 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1244 } 1245 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1246 sf->multi->multi = sf->multi; 1247 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1248 if (sf->rankorder) { /* Sort the ranks */ 1249 PetscMPIInt rank; 1250 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1251 PetscSFNode *newremote; 1252 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 1253 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1254 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1255 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1256 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1257 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr); 1258 /* Sort the incoming ranks at each vertex, build the inverse map */ 1259 for (i=0; i<sf->nroots; i++) { 1260 PetscInt j; 1261 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1262 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1263 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1264 } 1265 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1266 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr); 1267 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1268 for (i=0; i<sf->nleaves; i++) { 1269 newremote[i].rank = sf->remote[i].rank; 1270 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1271 } 1272 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1273 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1274 } 1275 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1276 } 1277 *multi = sf->multi; 1278 PetscFunctionReturn(0); 1279 } 1280 1281 /*@C 1282 PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices 1283 1284 Collective 1285 1286 Input Parameters: 1287 + sf - original star forest 1288 . nselected - number of selected roots on this process 1289 - selected - indices of the selected roots on this process 1290 1291 Output Parameter: 1292 . esf - new star forest 1293 1294 Level: advanced 1295 1296 Note: 1297 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1298 be done by calling PetscSFGetGraph(). 1299 1300 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1301 @*/ 1302 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1303 { 1304 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1305 const PetscInt *ilocal; 1306 signed char *rootdata,*leafdata,*leafmem; 1307 const PetscSFNode *iremote; 1308 PetscSFNode *new_iremote; 1309 MPI_Comm comm; 1310 PetscErrorCode ierr; 1311 1312 PetscFunctionBegin; 1313 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1314 PetscSFCheckGraphSet(sf,1); 1315 if (nselected) PetscValidPointer(selected,3); 1316 PetscValidPointer(esf,4); 1317 1318 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1319 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1320 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1321 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1322 1323 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1324 PetscBool dups; 1325 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1326 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1327 for (i=0; i<nselected; i++) 1328 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1329 } 1330 1331 if (sf->ops->CreateEmbeddedRootSF) { 1332 ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1333 } else { 1334 /* A generic version of creating embedded sf */ 1335 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1336 maxlocal = maxleaf - minleaf + 1; 1337 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1338 leafdata = leafmem - minleaf; 1339 /* Tag selected roots and bcast to leaves */ 1340 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1341 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1342 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1343 1344 /* Build esf with leaves that are still connected */ 1345 esf_nleaves = 0; 1346 for (i=0; i<nleaves; i++) { 1347 j = ilocal ? ilocal[i] : i; 1348 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1349 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1350 */ 1351 esf_nleaves += (leafdata[j] ? 1 : 0); 1352 } 1353 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1354 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1355 for (i=n=0; i<nleaves; i++) { 1356 j = ilocal ? ilocal[i] : i; 1357 if (leafdata[j]) { 1358 new_ilocal[n] = j; 1359 new_iremote[n].rank = iremote[i].rank; 1360 new_iremote[n].index = iremote[i].index; 1361 ++n; 1362 } 1363 } 1364 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1365 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1366 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1367 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1368 } 1369 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 /*@C 1374 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1375 1376 Collective 1377 1378 Input Parameters: 1379 + sf - original star forest 1380 . nselected - number of selected leaves on this process 1381 - selected - indices of the selected leaves on this process 1382 1383 Output Parameter: 1384 . newsf - new star forest 1385 1386 Level: advanced 1387 1388 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph() 1389 @*/ 1390 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1391 { 1392 const PetscSFNode *iremote; 1393 PetscSFNode *new_iremote; 1394 const PetscInt *ilocal; 1395 PetscInt i,nroots,*leaves,*new_ilocal; 1396 MPI_Comm comm; 1397 PetscErrorCode ierr; 1398 1399 PetscFunctionBegin; 1400 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1401 PetscSFCheckGraphSet(sf,1); 1402 if (nselected) PetscValidPointer(selected,3); 1403 PetscValidPointer(newsf,4); 1404 1405 /* Uniq selected[] and put results in leaves[] */ 1406 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1407 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1408 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1409 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1410 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1411 1412 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1413 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1414 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1415 } else { 1416 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1417 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1418 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1419 for (i=0; i<nselected; ++i) { 1420 const PetscInt l = leaves[i]; 1421 new_ilocal[i] = ilocal ? ilocal[l] : l; 1422 new_iremote[i].rank = iremote[l].rank; 1423 new_iremote[i].index = iremote[l].index; 1424 } 1425 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1426 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1427 } 1428 ierr = PetscFree(leaves);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 /*@C 1433 PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd() 1434 1435 Collective on PetscSF 1436 1437 Input Parameters: 1438 + sf - star forest on which to communicate 1439 . unit - data type associated with each node 1440 . rootdata - buffer to broadcast 1441 - op - operation to use for reduction 1442 1443 Output Parameter: 1444 . leafdata - buffer to be reduced with values from each leaf's respective root 1445 1446 Level: intermediate 1447 1448 Notes: 1449 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1450 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1451 use PetscSFBcastWithMemTypeBegin() instead. 1452 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin() 1453 @*/ 1454 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1455 { 1456 PetscErrorCode ierr; 1457 PetscMemType rootmtype,leafmtype; 1458 1459 PetscFunctionBegin; 1460 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1461 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1462 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1463 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1464 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1465 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1466 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1467 PetscFunctionReturn(0); 1468 } 1469 1470 /*@C 1471 PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd() 1472 1473 Collective on PetscSF 1474 1475 Input Parameters: 1476 + sf - star forest on which to communicate 1477 . unit - data type associated with each node 1478 . rootmtype - memory type of rootdata 1479 . rootdata - buffer to broadcast 1480 . leafmtype - memory type of leafdata 1481 - op - operation to use for reduction 1482 1483 Output Parameter: 1484 . leafdata - buffer to be reduced with values from each leaf's respective root 1485 1486 Level: intermediate 1487 1488 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin() 1489 @*/ 1490 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1491 { 1492 PetscErrorCode ierr; 1493 1494 PetscFunctionBegin; 1495 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1496 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1497 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1498 ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1499 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);} 1500 PetscFunctionReturn(0); 1501 } 1502 1503 /*@C 1504 PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin() 1505 1506 Collective 1507 1508 Input Parameters: 1509 + sf - star forest 1510 . unit - data type 1511 . rootdata - buffer to broadcast 1512 - op - operation to use for reduction 1513 1514 Output Parameter: 1515 . leafdata - buffer to be reduced with values from each leaf's respective root 1516 1517 Level: intermediate 1518 1519 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1520 @*/ 1521 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1522 { 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1527 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1528 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1529 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);} 1530 PetscFunctionReturn(0); 1531 } 1532 1533 /*@C 1534 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1535 1536 Collective 1537 1538 Input Parameters: 1539 + sf - star forest 1540 . unit - data type 1541 . leafdata - values to reduce 1542 - op - reduction operation 1543 1544 Output Parameter: 1545 . rootdata - result of reduction of values from all leaves of each root 1546 1547 Level: intermediate 1548 1549 Notes: 1550 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1551 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1552 use PetscSFReduceWithMemTypeBegin() instead. 1553 1554 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 1555 @*/ 1556 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1557 { 1558 PetscErrorCode ierr; 1559 PetscMemType rootmtype,leafmtype; 1560 1561 PetscFunctionBegin; 1562 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1563 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1564 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1565 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1566 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1567 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1568 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1569 PetscFunctionReturn(0); 1570 } 1571 1572 /*@C 1573 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1574 1575 Collective 1576 1577 Input Parameters: 1578 + sf - star forest 1579 . unit - data type 1580 . leafmtype - memory type of leafdata 1581 . leafdata - values to reduce 1582 . rootmtype - memory type of rootdata 1583 - op - reduction operation 1584 1585 Output Parameter: 1586 . rootdata - result of reduction of values from all leaves of each root 1587 1588 Level: intermediate 1589 1590 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1591 @*/ 1592 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1593 { 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1598 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1599 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1600 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1601 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);} 1602 PetscFunctionReturn(0); 1603 } 1604 1605 /*@C 1606 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1607 1608 Collective 1609 1610 Input Parameters: 1611 + sf - star forest 1612 . unit - data type 1613 . leafdata - values to reduce 1614 - op - reduction operation 1615 1616 Output Parameter: 1617 . rootdata - result of reduction of values from all leaves of each root 1618 1619 Level: intermediate 1620 1621 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1622 @*/ 1623 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1624 { 1625 PetscErrorCode ierr; 1626 1627 PetscFunctionBegin; 1628 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1629 if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1630 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1631 if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);} 1632 PetscFunctionReturn(0); 1633 } 1634 1635 /*@C 1636 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1637 1638 Collective 1639 1640 Input Parameters: 1641 + sf - star forest 1642 . unit - data type 1643 . leafdata - leaf values to use in reduction 1644 - op - operation to use for reduction 1645 1646 Output Parameters: 1647 + rootdata - root values to be updated, input state is seen by first process to perform an update 1648 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1649 1650 Level: advanced 1651 1652 Note: 1653 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1654 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1655 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1656 integers. 1657 1658 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1659 @*/ 1660 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1661 { 1662 PetscErrorCode ierr; 1663 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1664 1665 PetscFunctionBegin; 1666 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1667 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1668 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1669 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1670 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1671 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1672 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1673 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1674 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 /*@C 1679 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1680 1681 Collective 1682 1683 Input Parameters: 1684 + sf - star forest 1685 . unit - data type 1686 . leafdata - leaf values to use in reduction 1687 - op - operation to use for reduction 1688 1689 Output Parameters: 1690 + rootdata - root values to be updated, input state is seen by first process to perform an update 1691 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1692 1693 Level: advanced 1694 1695 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1696 @*/ 1697 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1698 { 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1703 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1704 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1705 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 /*@C 1710 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1711 1712 Collective 1713 1714 Input Parameter: 1715 . sf - star forest 1716 1717 Output Parameter: 1718 . degree - degree of each root vertex 1719 1720 Level: advanced 1721 1722 Notes: 1723 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1724 1725 .seealso: PetscSFGatherBegin() 1726 @*/ 1727 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1728 { 1729 PetscErrorCode ierr; 1730 1731 PetscFunctionBegin; 1732 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1733 PetscSFCheckGraphSet(sf,1); 1734 PetscValidPointer(degree,2); 1735 if (!sf->degreeknown) { 1736 PetscInt i, nroots = sf->nroots, maxlocal; 1737 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1738 maxlocal = sf->maxleaf-sf->minleaf+1; 1739 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1740 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1741 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1742 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1743 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1744 } 1745 *degree = NULL; 1746 PetscFunctionReturn(0); 1747 } 1748 1749 /*@C 1750 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1751 1752 Collective 1753 1754 Input Parameter: 1755 . sf - star forest 1756 1757 Output Parameter: 1758 . degree - degree of each root vertex 1759 1760 Level: developer 1761 1762 Notes: 1763 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1764 1765 .seealso: 1766 @*/ 1767 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1768 { 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1773 PetscSFCheckGraphSet(sf,1); 1774 PetscValidPointer(degree,2); 1775 if (!sf->degreeknown) { 1776 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1777 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1778 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1779 sf->degreeknown = PETSC_TRUE; 1780 } 1781 *degree = sf->degree; 1782 PetscFunctionReturn(0); 1783 } 1784 1785 /*@C 1786 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1787 Each multi-root is assigned index of the corresponding original root. 1788 1789 Collective 1790 1791 Input Parameters: 1792 + sf - star forest 1793 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1794 1795 Output Parameters: 1796 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1797 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1798 1799 Level: developer 1800 1801 Notes: 1802 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1803 1804 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1805 @*/ 1806 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1807 { 1808 PetscSF msf; 1809 PetscInt i, j, k, nroots, nmroots; 1810 PetscErrorCode ierr; 1811 1812 PetscFunctionBegin; 1813 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1814 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1815 if (nroots) PetscValidIntPointer(degree,2); 1816 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1817 PetscValidPointer(multiRootsOrigNumbering,4); 1818 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1819 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1820 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1821 for (i=0,j=0,k=0; i<nroots; i++) { 1822 if (!degree[i]) continue; 1823 for (j=0; j<degree[i]; j++,k++) { 1824 (*multiRootsOrigNumbering)[k] = i; 1825 } 1826 } 1827 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1828 if (nMultiRoots) *nMultiRoots = nmroots; 1829 PetscFunctionReturn(0); 1830 } 1831 1832 /*@C 1833 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1834 1835 Collective 1836 1837 Input Parameters: 1838 + sf - star forest 1839 . unit - data type 1840 - leafdata - leaf data to gather to roots 1841 1842 Output Parameter: 1843 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1844 1845 Level: intermediate 1846 1847 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1848 @*/ 1849 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1850 { 1851 PetscErrorCode ierr; 1852 PetscSF multi = NULL; 1853 1854 PetscFunctionBegin; 1855 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1856 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1857 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1858 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 /*@C 1863 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1864 1865 Collective 1866 1867 Input Parameters: 1868 + sf - star forest 1869 . unit - data type 1870 - leafdata - leaf data to gather to roots 1871 1872 Output Parameter: 1873 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1874 1875 Level: intermediate 1876 1877 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1878 @*/ 1879 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1880 { 1881 PetscErrorCode ierr; 1882 PetscSF multi = NULL; 1883 1884 PetscFunctionBegin; 1885 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1886 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1887 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 /*@C 1892 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1893 1894 Collective 1895 1896 Input Parameters: 1897 + sf - star forest 1898 . unit - data type 1899 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1900 1901 Output Parameter: 1902 . leafdata - leaf data to be update with personal data from each respective root 1903 1904 Level: intermediate 1905 1906 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1907 @*/ 1908 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1909 { 1910 PetscErrorCode ierr; 1911 PetscSF multi = NULL; 1912 1913 PetscFunctionBegin; 1914 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1915 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1916 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1917 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 /*@C 1922 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1923 1924 Collective 1925 1926 Input Parameters: 1927 + sf - star forest 1928 . unit - data type 1929 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1930 1931 Output Parameter: 1932 . leafdata - leaf data to be update with personal data from each respective root 1933 1934 Level: intermediate 1935 1936 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1937 @*/ 1938 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1939 { 1940 PetscErrorCode ierr; 1941 PetscSF multi = NULL; 1942 1943 PetscFunctionBegin; 1944 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1945 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1946 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 1947 PetscFunctionReturn(0); 1948 } 1949 1950 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1951 { 1952 PetscInt i, n, nleaves; 1953 const PetscInt *ilocal = NULL; 1954 PetscHSetI seen; 1955 PetscErrorCode ierr; 1956 1957 PetscFunctionBegin; 1958 if (PetscDefined(USE_DEBUG)) { 1959 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1960 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1961 for (i = 0; i < nleaves; i++) { 1962 const PetscInt leaf = ilocal ? ilocal[i] : i; 1963 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1964 } 1965 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1966 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1967 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1968 } 1969 PetscFunctionReturn(0); 1970 } 1971 1972 /*@ 1973 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1974 1975 Input Parameters: 1976 + sfA - The first PetscSF 1977 - sfB - The second PetscSF 1978 1979 Output Parameters: 1980 . sfBA - The composite SF 1981 1982 Level: developer 1983 1984 Notes: 1985 Currently, the two SFs must be defined on congruent communicators and they must be true star 1986 forests, i.e. the same leaf is not connected with different roots. 1987 1988 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1989 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1990 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1991 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1992 1993 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1994 @*/ 1995 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1996 { 1997 PetscErrorCode ierr; 1998 const PetscSFNode *remotePointsA,*remotePointsB; 1999 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 2000 const PetscInt *localPointsA,*localPointsB; 2001 PetscInt *localPointsBA; 2002 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 2003 PetscBool denseB; 2004 2005 PetscFunctionBegin; 2006 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2007 PetscSFCheckGraphSet(sfA,1); 2008 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2009 PetscSFCheckGraphSet(sfB,2); 2010 PetscCheckSameComm(sfA,1,sfB,2); 2011 PetscValidPointer(sfBA,3); 2012 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2013 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2014 2015 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 2016 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 2017 if (localPointsA) { 2018 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 2019 for (i=0; i<numRootsB; i++) { 2020 reorderedRemotePointsA[i].rank = -1; 2021 reorderedRemotePointsA[i].index = -1; 2022 } 2023 for (i=0; i<numLeavesA; i++) { 2024 if (localPointsA[i] >= numRootsB) continue; 2025 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 2026 } 2027 remotePointsA = reorderedRemotePointsA; 2028 } 2029 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 2030 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 2031 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2032 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr); 2033 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2034 2035 denseB = (PetscBool)!localPointsB; 2036 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2037 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 2038 else numLeavesBA++; 2039 } 2040 if (denseB) { 2041 localPointsBA = NULL; 2042 remotePointsBA = leafdataB; 2043 } else { 2044 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 2045 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 2046 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 2047 const PetscInt l = localPointsB ? localPointsB[i] : i; 2048 2049 if (leafdataB[l-minleaf].rank == -1) continue; 2050 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 2051 localPointsBA[numLeavesBA] = l; 2052 numLeavesBA++; 2053 } 2054 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2055 } 2056 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2057 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2058 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2059 PetscFunctionReturn(0); 2060 } 2061 2062 /*@ 2063 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 2064 2065 Input Parameters: 2066 + sfA - The first PetscSF 2067 - sfB - The second PetscSF 2068 2069 Output Parameters: 2070 . sfBA - The composite SF. 2071 2072 Level: developer 2073 2074 Notes: 2075 Currently, the two SFs must be defined on congruent communicators and they must be true star 2076 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 2077 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 2078 2079 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 2080 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 2081 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 2082 a Reduce on sfB, on connected roots. 2083 2084 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 2085 @*/ 2086 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 2087 { 2088 PetscErrorCode ierr; 2089 const PetscSFNode *remotePointsA,*remotePointsB; 2090 PetscSFNode *remotePointsBA; 2091 const PetscInt *localPointsA,*localPointsB; 2092 PetscSFNode *reorderedRemotePointsA = NULL; 2093 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 2094 MPI_Op op; 2095 #if defined(PETSC_USE_64BIT_INDICES) 2096 PetscBool iswin; 2097 #endif 2098 2099 PetscFunctionBegin; 2100 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 2101 PetscSFCheckGraphSet(sfA,1); 2102 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 2103 PetscSFCheckGraphSet(sfB,2); 2104 PetscCheckSameComm(sfA,1,sfB,2); 2105 PetscValidPointer(sfBA,3); 2106 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 2107 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 2108 2109 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 2110 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 2111 2112 /* TODO: Check roots of sfB have degree of 1 */ 2113 /* Once we implement it, we can replace the MPI_MAXLOC 2114 with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect. 2115 We use MPI_MAXLOC only to have a deterministic output from this routine if 2116 the root condition is not meet. 2117 */ 2118 op = MPI_MAXLOC; 2119 #if defined(PETSC_USE_64BIT_INDICES) 2120 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 2121 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 2122 if (iswin) op = MPI_REPLACE; 2123 #endif 2124 2125 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 2126 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 2127 for (i=0; i<maxleaf - minleaf + 1; i++) { 2128 reorderedRemotePointsA[i].rank = -1; 2129 reorderedRemotePointsA[i].index = -1; 2130 } 2131 if (localPointsA) { 2132 for (i=0; i<numLeavesA; i++) { 2133 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 2134 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 2135 } 2136 } else { 2137 for (i=0; i<numLeavesA; i++) { 2138 if (i > maxleaf || i < minleaf) continue; 2139 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 2140 } 2141 } 2142 2143 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 2144 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 2145 for (i=0; i<numRootsB; i++) { 2146 remotePointsBA[i].rank = -1; 2147 remotePointsBA[i].index = -1; 2148 } 2149 2150 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2151 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 2152 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 2153 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 2154 if (remotePointsBA[i].rank == -1) continue; 2155 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 2156 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 2157 localPointsBA[numLeavesBA] = i; 2158 numLeavesBA++; 2159 } 2160 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 2161 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 2162 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 /* 2167 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 2168 2169 Input Parameters: 2170 . sf - The global PetscSF 2171 2172 Output Parameters: 2173 . out - The local PetscSF 2174 */ 2175 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 2176 { 2177 MPI_Comm comm; 2178 PetscMPIInt myrank; 2179 const PetscInt *ilocal; 2180 const PetscSFNode *iremote; 2181 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2182 PetscSFNode *liremote; 2183 PetscSF lsf; 2184 PetscErrorCode ierr; 2185 2186 PetscFunctionBegin; 2187 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2188 if (sf->ops->CreateLocalSF) { 2189 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2190 } else { 2191 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2192 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2193 ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr); 2194 2195 /* Find out local edges and build a local SF */ 2196 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2197 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2198 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2199 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2200 2201 for (i=j=0; i<nleaves; i++) { 2202 if (iremote[i].rank == (PetscInt)myrank) { 2203 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2204 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2205 liremote[j].index = iremote[i].index; 2206 j++; 2207 } 2208 } 2209 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2210 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 2211 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2212 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2213 *out = lsf; 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 2218 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2219 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2220 { 2221 PetscErrorCode ierr; 2222 PetscMemType rootmtype,leafmtype; 2223 2224 PetscFunctionBegin; 2225 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2226 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2227 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2228 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2229 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2230 if (sf->ops->BcastToZero) { 2231 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2232 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2233 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 2234 PetscFunctionReturn(0); 2235 } 2236 2237