1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 { 98 PetscInt i; 99 for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}} 100 } 101 #endif 102 sf->degreeknown = PETSC_FALSE; 103 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 104 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 105 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 106 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 107 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 108 sf->setupcalled = PETSC_FALSE; 109 PetscFunctionReturn(0); 110 } 111 112 /*@C 113 PetscSFSetType - Set the PetscSF communication implementation 114 115 Collective on PetscSF 116 117 Input Parameters: 118 + sf - the PetscSF context 119 - type - a known method 120 121 Options Database Key: 122 . -sf_type <type> - Sets the method; use -help for a list 123 of available methods (for instance, window, basic, neighbor) 124 125 Notes: 126 See "include/petscsf.h" for available methods (for instance) 127 + PETSCSFWINDOW - MPI-2/3 one-sided 128 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 129 130 Level: intermediate 131 132 .seealso: PetscSFType, PetscSFCreate() 133 @*/ 134 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 135 { 136 PetscErrorCode ierr,(*r)(PetscSF); 137 PetscBool match; 138 139 PetscFunctionBegin; 140 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 141 PetscValidCharPointer(type,2); 142 143 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 144 if (match) PetscFunctionReturn(0); 145 146 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 147 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 148 /* Destroy the previous PetscSF implementation context */ 149 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 150 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 151 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 152 ierr = (*r)(sf);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 /*@C 157 PetscSFGetType - Get the PetscSF communication implementation 158 159 Not Collective 160 161 Input Parameter: 162 . sf - the PetscSF context 163 164 Output Parameter: 165 . type - the PetscSF type name 166 167 Level: intermediate 168 169 .seealso: PetscSFSetType(), PetscSFCreate() 170 @*/ 171 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 172 { 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 175 PetscValidPointer(type,2); 176 *type = ((PetscObject)sf)->type_name; 177 PetscFunctionReturn(0); 178 } 179 180 /*@ 181 PetscSFDestroy - destroy star forest 182 183 Collective 184 185 Input Arguments: 186 . sf - address of star forest 187 188 Level: intermediate 189 190 .seealso: PetscSFCreate(), PetscSFReset() 191 @*/ 192 PetscErrorCode PetscSFDestroy(PetscSF *sf) 193 { 194 PetscErrorCode ierr; 195 196 PetscFunctionBegin; 197 if (!*sf) PetscFunctionReturn(0); 198 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 199 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 200 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 201 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 202 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 207 { 208 PetscInt i, nleaves; 209 PetscMPIInt size; 210 const PetscInt *ilocal; 211 const PetscSFNode *iremote; 212 PetscErrorCode ierr; 213 214 PetscFunctionBegin; 215 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 216 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 217 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 218 for (i = 0; i < nleaves; i++) { 219 const PetscInt rank = iremote[i].rank; 220 const PetscInt remote = iremote[i].index; 221 const PetscInt leaf = ilocal ? ilocal[i] : i; 222 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 223 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 224 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 225 } 226 PetscFunctionReturn(0); 227 } 228 229 /*@ 230 PetscSFSetUp - set up communication structures 231 232 Collective 233 234 Input Arguments: 235 . sf - star forest communication object 236 237 Level: beginner 238 239 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 240 @*/ 241 PetscErrorCode PetscSFSetUp(PetscSF sf) 242 { 243 PetscErrorCode ierr; 244 245 PetscFunctionBegin; 246 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 247 PetscSFCheckGraphSet(sf,1); 248 if (sf->setupcalled) PetscFunctionReturn(0); 249 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 250 sf->use_gpu_aware_mpi = use_gpu_aware_mpi; 251 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 252 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 253 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 254 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 255 sf->setupcalled = PETSC_TRUE; 256 PetscFunctionReturn(0); 257 } 258 259 /*@ 260 PetscSFSetFromOptions - set PetscSF options using the options database 261 262 Logically Collective 263 264 Input Arguments: 265 . sf - star forest 266 267 Options Database Keys: 268 + -sf_type - implementation type, see PetscSFSetType() 269 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 270 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 271 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 272 If true, this option only works with -use_cuda_aware_mpi 1. 273 - -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 274 If true, this option only works with -use_cuda_aware_mpi 1. 275 276 Level: intermediate 277 @*/ 278 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 279 { 280 PetscSFType deft; 281 char type[256]; 282 PetscErrorCode ierr; 283 PetscBool flg; 284 285 PetscFunctionBegin; 286 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 287 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 288 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 289 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 290 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 291 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 292 293 #if defined(PETSC_HAVE_CUDA) 294 sf->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 295 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 296 sf->use_stream_aware_mpi = PETSC_FALSE; 297 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 298 #endif 299 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 300 ierr = PetscOptionsEnd();CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 /*@ 305 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 306 307 Logically Collective 308 309 Input Arguments: 310 + sf - star forest 311 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 312 313 Level: advanced 314 315 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 316 @*/ 317 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 318 { 319 PetscFunctionBegin; 320 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 321 PetscValidLogicalCollectiveBool(sf,flg,2); 322 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 323 sf->rankorder = flg; 324 PetscFunctionReturn(0); 325 } 326 327 /*@ 328 PetscSFSetGraph - Set a parallel star forest 329 330 Collective 331 332 Input Arguments: 333 + sf - star forest 334 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 335 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 336 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 337 during setup in debug mode) 338 . localmode - copy mode for ilocal 339 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 340 during setup in debug mode) 341 - remotemode - copy mode for iremote 342 343 Level: intermediate 344 345 Notes: 346 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 347 348 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 349 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 350 needed 351 352 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 353 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 354 355 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 356 @*/ 357 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 358 { 359 PetscErrorCode ierr; 360 361 PetscFunctionBegin; 362 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 363 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 364 if (nleaves > 0) PetscValidPointer(iremote,6); 365 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 366 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 367 368 if (sf->nroots >= 0) { /* Reset only if graph already set */ 369 ierr = PetscSFReset(sf);CHKERRQ(ierr); 370 } 371 372 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 373 374 sf->nroots = nroots; 375 sf->nleaves = nleaves; 376 377 if (nleaves && ilocal) { 378 PetscInt i; 379 PetscInt minleaf = PETSC_MAX_INT; 380 PetscInt maxleaf = PETSC_MIN_INT; 381 int contiguous = 1; 382 for (i=0; i<nleaves; i++) { 383 minleaf = PetscMin(minleaf,ilocal[i]); 384 maxleaf = PetscMax(maxleaf,ilocal[i]); 385 contiguous &= (ilocal[i] == i); 386 } 387 sf->minleaf = minleaf; 388 sf->maxleaf = maxleaf; 389 if (contiguous) { 390 if (localmode == PETSC_OWN_POINTER) { 391 ierr = PetscFree(ilocal);CHKERRQ(ierr); 392 } 393 ilocal = NULL; 394 } 395 } else { 396 sf->minleaf = 0; 397 sf->maxleaf = nleaves - 1; 398 } 399 400 if (ilocal) { 401 switch (localmode) { 402 case PETSC_COPY_VALUES: 403 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 404 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 405 sf->mine = sf->mine_alloc; 406 break; 407 case PETSC_OWN_POINTER: 408 sf->mine_alloc = (PetscInt*)ilocal; 409 sf->mine = sf->mine_alloc; 410 break; 411 case PETSC_USE_POINTER: 412 sf->mine_alloc = NULL; 413 sf->mine = (PetscInt*)ilocal; 414 break; 415 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 416 } 417 } 418 419 switch (remotemode) { 420 case PETSC_COPY_VALUES: 421 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 422 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 423 sf->remote = sf->remote_alloc; 424 break; 425 case PETSC_OWN_POINTER: 426 sf->remote_alloc = (PetscSFNode*)iremote; 427 sf->remote = sf->remote_alloc; 428 break; 429 case PETSC_USE_POINTER: 430 sf->remote_alloc = NULL; 431 sf->remote = (PetscSFNode*)iremote; 432 break; 433 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 434 } 435 436 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 437 sf->graphset = PETSC_TRUE; 438 PetscFunctionReturn(0); 439 } 440 441 /*@ 442 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 443 444 Collective 445 446 Input Parameters: 447 + sf - The PetscSF 448 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 449 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 450 451 Notes: 452 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 453 n and N are local and global sizes of x respectively. 454 455 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 456 sequential vectors y on all ranks. 457 458 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 459 sequential vector y on rank 0. 460 461 In above cases, entries of x are roots and entries of y are leaves. 462 463 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 464 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 465 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 466 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 467 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 468 469 In this case, roots and leaves are symmetric. 470 471 Level: intermediate 472 @*/ 473 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 474 { 475 MPI_Comm comm; 476 PetscInt n,N,res[2]; 477 PetscMPIInt rank,size; 478 PetscSFType type; 479 PetscErrorCode ierr; 480 481 PetscFunctionBegin; 482 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 483 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 484 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 485 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 486 487 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 488 type = PETSCSFALLTOALL; 489 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 490 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 491 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 492 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 493 } else { 494 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 495 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 496 res[0] = n; 497 res[1] = -n; 498 /* Check if n are same over all ranks so that we can optimize it */ 499 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 500 if (res[0] == -res[1]) { /* same n */ 501 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 502 } else { 503 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 504 } 505 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 506 } 507 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 508 509 sf->pattern = pattern; 510 sf->mine = NULL; /* Contiguous */ 511 512 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 513 Also set other easy stuff. 514 */ 515 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 516 sf->nleaves = N; 517 sf->nroots = n; 518 sf->nranks = size; 519 sf->minleaf = 0; 520 sf->maxleaf = N - 1; 521 } else if (pattern == PETSCSF_PATTERN_GATHER) { 522 sf->nleaves = rank ? 0 : N; 523 sf->nroots = n; 524 sf->nranks = rank ? 0 : size; 525 sf->minleaf = 0; 526 sf->maxleaf = rank ? -1 : N - 1; 527 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 528 sf->nleaves = size; 529 sf->nroots = size; 530 sf->nranks = size; 531 sf->minleaf = 0; 532 sf->maxleaf = size - 1; 533 } 534 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 535 sf->graphset = PETSC_TRUE; 536 PetscFunctionReturn(0); 537 } 538 539 /*@ 540 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 541 542 Collective 543 544 Input Arguments: 545 546 . sf - star forest to invert 547 548 Output Arguments: 549 . isf - inverse of sf 550 Level: advanced 551 552 Notes: 553 All roots must have degree 1. 554 555 The local space may be a permutation, but cannot be sparse. 556 557 .seealso: PetscSFSetGraph() 558 @*/ 559 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 560 { 561 PetscErrorCode ierr; 562 PetscMPIInt rank; 563 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 564 const PetscInt *ilocal; 565 PetscSFNode *roots,*leaves; 566 567 PetscFunctionBegin; 568 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 569 PetscSFCheckGraphSet(sf,1); 570 PetscValidPointer(isf,2); 571 572 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 573 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 574 575 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 576 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 577 for (i=0; i<maxlocal; i++) { 578 leaves[i].rank = rank; 579 leaves[i].index = i; 580 } 581 for (i=0; i <nroots; i++) { 582 roots[i].rank = -1; 583 roots[i].index = -1; 584 } 585 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 586 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 587 588 /* Check whether our leaves are sparse */ 589 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 590 if (count == nroots) newilocal = NULL; 591 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 592 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 593 for (i=0,count=0; i<nroots; i++) { 594 if (roots[i].rank >= 0) { 595 newilocal[count] = i; 596 roots[count].rank = roots[i].rank; 597 roots[count].index = roots[i].index; 598 count++; 599 } 600 } 601 } 602 603 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 604 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 605 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608 609 /*@ 610 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 611 612 Collective 613 614 Input Arguments: 615 + sf - communication object to duplicate 616 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 617 618 Output Arguments: 619 . newsf - new communication object 620 621 Level: beginner 622 623 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 624 @*/ 625 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 626 { 627 PetscSFType type; 628 PetscErrorCode ierr; 629 630 PetscFunctionBegin; 631 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 632 PetscValidLogicalCollectiveEnum(sf,opt,2); 633 PetscValidPointer(newsf,3); 634 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 635 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 636 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 637 if (opt == PETSCSF_DUPLICATE_GRAPH) { 638 PetscSFCheckGraphSet(sf,1); 639 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 640 PetscInt nroots,nleaves; 641 const PetscInt *ilocal; 642 const PetscSFNode *iremote; 643 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 644 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 645 } else { 646 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 647 } 648 } 649 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 650 PetscFunctionReturn(0); 651 } 652 653 /*@C 654 PetscSFGetGraph - Get the graph specifying a parallel star forest 655 656 Not Collective 657 658 Input Arguments: 659 . sf - star forest 660 661 Output Arguments: 662 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 663 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 664 . ilocal - locations of leaves in leafdata buffers 665 - iremote - remote locations of root vertices for each leaf on the current process 666 667 Notes: 668 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 669 670 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 671 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 672 673 Level: intermediate 674 675 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 676 @*/ 677 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 678 { 679 PetscErrorCode ierr; 680 681 PetscFunctionBegin; 682 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 683 if (sf->ops->GetGraph) { 684 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 685 } else { 686 if (nroots) *nroots = sf->nroots; 687 if (nleaves) *nleaves = sf->nleaves; 688 if (ilocal) *ilocal = sf->mine; 689 if (iremote) *iremote = sf->remote; 690 } 691 PetscFunctionReturn(0); 692 } 693 694 /*@ 695 PetscSFGetLeafRange - Get the active leaf ranges 696 697 Not Collective 698 699 Input Arguments: 700 . sf - star forest 701 702 Output Arguments: 703 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 704 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 705 706 Level: developer 707 708 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 709 @*/ 710 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 711 { 712 713 PetscFunctionBegin; 714 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 715 PetscSFCheckGraphSet(sf,1); 716 if (minleaf) *minleaf = sf->minleaf; 717 if (maxleaf) *maxleaf = sf->maxleaf; 718 PetscFunctionReturn(0); 719 } 720 721 /*@C 722 PetscSFViewFromOptions - View from Options 723 724 Collective on PetscSF 725 726 Input Parameters: 727 + A - the star forest 728 . obj - Optional object 729 - name - command line option 730 731 Level: intermediate 732 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 733 @*/ 734 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 735 { 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 740 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 /*@C 745 PetscSFView - view a star forest 746 747 Collective 748 749 Input Arguments: 750 + sf - star forest 751 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 752 753 Level: beginner 754 755 .seealso: PetscSFCreate(), PetscSFSetGraph() 756 @*/ 757 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 758 { 759 PetscErrorCode ierr; 760 PetscBool iascii; 761 PetscViewerFormat format; 762 763 PetscFunctionBegin; 764 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 765 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 766 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 767 PetscCheckSameComm(sf,1,viewer,2); 768 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 769 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 770 if (iascii) { 771 PetscMPIInt rank; 772 PetscInt ii,i,j; 773 774 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 775 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 776 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 777 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 778 if (!sf->graphset) { 779 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 780 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 784 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 785 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 786 for (i=0; i<sf->nleaves; i++) { 787 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 788 } 789 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 790 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 791 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 792 PetscMPIInt *tmpranks,*perm; 793 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 794 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 795 for (i=0; i<sf->nranks; i++) perm[i] = i; 796 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 797 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 798 for (ii=0; ii<sf->nranks; ii++) { 799 i = perm[ii]; 800 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 801 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 802 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 803 } 804 } 805 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 806 } 807 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 808 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 809 } 810 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 811 } 812 PetscFunctionReturn(0); 813 } 814 815 /*@C 816 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 817 818 Not Collective 819 820 Input Arguments: 821 . sf - star forest 822 823 Output Arguments: 824 + nranks - number of ranks referenced by local part 825 . ranks - array of ranks 826 . roffset - offset in rmine/rremote for each rank (length nranks+1) 827 . rmine - concatenated array holding local indices referencing each remote rank 828 - rremote - concatenated array holding remote indices referenced for each remote rank 829 830 Level: developer 831 832 .seealso: PetscSFGetLeafRanks() 833 @*/ 834 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 835 { 836 PetscErrorCode ierr; 837 838 PetscFunctionBegin; 839 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 840 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 841 if (sf->ops->GetRootRanks) { 842 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 843 } else { 844 /* The generic implementation */ 845 if (nranks) *nranks = sf->nranks; 846 if (ranks) *ranks = sf->ranks; 847 if (roffset) *roffset = sf->roffset; 848 if (rmine) *rmine = sf->rmine; 849 if (rremote) *rremote = sf->rremote; 850 } 851 PetscFunctionReturn(0); 852 } 853 854 /*@C 855 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 856 857 Not Collective 858 859 Input Arguments: 860 . sf - star forest 861 862 Output Arguments: 863 + niranks - number of leaf ranks referencing roots on this process 864 . iranks - array of ranks 865 . ioffset - offset in irootloc for each rank (length niranks+1) 866 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 867 868 Level: developer 869 870 .seealso: PetscSFGetRootRanks() 871 @*/ 872 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 873 { 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 878 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 879 if (sf->ops->GetLeafRanks) { 880 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 881 } else { 882 PetscSFType type; 883 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 884 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 890 PetscInt i; 891 for (i=0; i<n; i++) { 892 if (needle == list[i]) return PETSC_TRUE; 893 } 894 return PETSC_FALSE; 895 } 896 897 /*@C 898 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 899 900 Collective 901 902 Input Arguments: 903 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 904 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 905 906 Level: developer 907 908 .seealso: PetscSFGetRootRanks() 909 @*/ 910 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 911 { 912 PetscErrorCode ierr; 913 PetscTable table; 914 PetscTablePosition pos; 915 PetscMPIInt size,groupsize,*groupranks; 916 PetscInt *rcount,*ranks; 917 PetscInt i, irank = -1,orank = -1; 918 919 PetscFunctionBegin; 920 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 921 PetscSFCheckGraphSet(sf,1); 922 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 923 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 924 for (i=0; i<sf->nleaves; i++) { 925 /* Log 1-based rank */ 926 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 927 } 928 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 929 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 930 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 931 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 932 for (i=0; i<sf->nranks; i++) { 933 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 934 ranks[i]--; /* Convert back to 0-based */ 935 } 936 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 937 938 /* We expect that dgroup is reliably "small" while nranks could be large */ 939 { 940 MPI_Group group = MPI_GROUP_NULL; 941 PetscMPIInt *dgroupranks; 942 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 943 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 944 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 945 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 946 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 947 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 948 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 949 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 950 } 951 952 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 953 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 954 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 955 if (InList(ranks[i],groupsize,groupranks)) break; 956 } 957 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 958 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 959 } 960 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 961 PetscInt tmprank,tmpcount; 962 963 tmprank = ranks[i]; 964 tmpcount = rcount[i]; 965 ranks[i] = ranks[sf->ndranks]; 966 rcount[i] = rcount[sf->ndranks]; 967 ranks[sf->ndranks] = tmprank; 968 rcount[sf->ndranks] = tmpcount; 969 sf->ndranks++; 970 } 971 } 972 ierr = PetscFree(groupranks);CHKERRQ(ierr); 973 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 974 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 975 sf->roffset[0] = 0; 976 for (i=0; i<sf->nranks; i++) { 977 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 978 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 979 rcount[i] = 0; 980 } 981 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 982 /* short circuit */ 983 if (orank != sf->remote[i].rank) { 984 /* Search for index of iremote[i].rank in sf->ranks */ 985 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 986 if (irank < 0) { 987 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 988 if (irank >= 0) irank += sf->ndranks; 989 } 990 orank = sf->remote[i].rank; 991 } 992 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 993 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 994 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 995 rcount[irank]++; 996 } 997 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 /*@C 1002 PetscSFGetGroups - gets incoming and outgoing process groups 1003 1004 Collective 1005 1006 Input Argument: 1007 . sf - star forest 1008 1009 Output Arguments: 1010 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1011 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1012 1013 Level: developer 1014 1015 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1016 @*/ 1017 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1018 { 1019 PetscErrorCode ierr; 1020 MPI_Group group = MPI_GROUP_NULL; 1021 1022 PetscFunctionBegin; 1023 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1024 if (sf->ingroup == MPI_GROUP_NULL) { 1025 PetscInt i; 1026 const PetscInt *indegree; 1027 PetscMPIInt rank,*outranks,*inranks; 1028 PetscSFNode *remote; 1029 PetscSF bgcount; 1030 1031 /* Compute the number of incoming ranks */ 1032 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1033 for (i=0; i<sf->nranks; i++) { 1034 remote[i].rank = sf->ranks[i]; 1035 remote[i].index = 0; 1036 } 1037 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1038 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1039 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1040 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1041 /* Enumerate the incoming ranks */ 1042 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1043 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1044 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1045 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1046 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1047 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1048 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1049 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1050 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1051 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1052 } 1053 *incoming = sf->ingroup; 1054 1055 if (sf->outgroup == MPI_GROUP_NULL) { 1056 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1057 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1058 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1059 } 1060 *outgoing = sf->outgroup; 1061 PetscFunctionReturn(0); 1062 } 1063 1064 /*@ 1065 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1066 1067 Collective 1068 1069 Input Argument: 1070 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1071 1072 Output Arguments: 1073 . multi - star forest with split roots, such that each root has degree exactly 1 1074 1075 Level: developer 1076 1077 Notes: 1078 1079 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1080 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1081 edge, it is a candidate for future optimization that might involve its removal. 1082 1083 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1084 @*/ 1085 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1086 { 1087 PetscErrorCode ierr; 1088 1089 PetscFunctionBegin; 1090 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1091 PetscValidPointer(multi,2); 1092 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1093 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1094 *multi = sf->multi; 1095 PetscFunctionReturn(0); 1096 } 1097 if (!sf->multi) { 1098 const PetscInt *indegree; 1099 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1100 PetscSFNode *remote; 1101 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1102 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1103 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1104 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1105 inoffset[0] = 0; 1106 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1107 for (i=0; i<maxlocal; i++) outones[i] = 1; 1108 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1109 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1110 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1111 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1112 for (i=0; i<sf->nroots; i++) { 1113 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1114 } 1115 } 1116 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1117 for (i=0; i<sf->nleaves; i++) { 1118 remote[i].rank = sf->remote[i].rank; 1119 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1120 } 1121 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1122 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1123 if (sf->rankorder) { /* Sort the ranks */ 1124 PetscMPIInt rank; 1125 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1126 PetscSFNode *newremote; 1127 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1128 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1129 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1130 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1131 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1132 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1133 /* Sort the incoming ranks at each vertex, build the inverse map */ 1134 for (i=0; i<sf->nroots; i++) { 1135 PetscInt j; 1136 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1137 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1138 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1139 } 1140 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1141 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1142 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1143 for (i=0; i<sf->nleaves; i++) { 1144 newremote[i].rank = sf->remote[i].rank; 1145 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1146 } 1147 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1148 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1149 } 1150 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1151 } 1152 *multi = sf->multi; 1153 PetscFunctionReturn(0); 1154 } 1155 1156 /*@C 1157 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1158 1159 Collective 1160 1161 Input Arguments: 1162 + sf - original star forest 1163 . nselected - number of selected roots on this process 1164 - selected - indices of the selected roots on this process 1165 1166 Output Arguments: 1167 . esf - new star forest 1168 1169 Level: advanced 1170 1171 Note: 1172 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1173 be done by calling PetscSFGetGraph(). 1174 1175 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1176 @*/ 1177 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1178 { 1179 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1180 const PetscInt *ilocal; 1181 signed char *rootdata,*leafdata,*leafmem; 1182 const PetscSFNode *iremote; 1183 PetscSFNode *new_iremote; 1184 MPI_Comm comm; 1185 PetscErrorCode ierr; 1186 1187 PetscFunctionBegin; 1188 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1189 PetscSFCheckGraphSet(sf,1); 1190 if (nselected) PetscValidPointer(selected,3); 1191 PetscValidPointer(esf,4); 1192 1193 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1194 1195 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1196 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1197 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1198 1199 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1200 1201 PetscBool dups; 1202 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1203 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1204 for (i=0; i<nselected; i++) 1205 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1206 } 1207 1208 if (sf->ops->CreateEmbeddedSF) { 1209 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1210 } else { 1211 /* A generic version of creating embedded sf */ 1212 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1213 maxlocal = maxleaf - minleaf + 1; 1214 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1215 leafdata = leafmem - minleaf; 1216 /* Tag selected roots and bcast to leaves */ 1217 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1218 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1219 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1220 1221 /* Build esf with leaves that are still connected */ 1222 esf_nleaves = 0; 1223 for (i=0; i<nleaves; i++) { 1224 j = ilocal ? ilocal[i] : i; 1225 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1226 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1227 */ 1228 esf_nleaves += (leafdata[j] ? 1 : 0); 1229 } 1230 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1231 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1232 for (i=n=0; i<nleaves; i++) { 1233 j = ilocal ? ilocal[i] : i; 1234 if (leafdata[j]) { 1235 new_ilocal[n] = j; 1236 new_iremote[n].rank = iremote[i].rank; 1237 new_iremote[n].index = iremote[i].index; 1238 ++n; 1239 } 1240 } 1241 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1242 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1243 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1244 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1245 } 1246 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 /*@C 1251 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1252 1253 Collective 1254 1255 Input Arguments: 1256 + sf - original star forest 1257 . nselected - number of selected leaves on this process 1258 - selected - indices of the selected leaves on this process 1259 1260 Output Arguments: 1261 . newsf - new star forest 1262 1263 Level: advanced 1264 1265 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1266 @*/ 1267 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1268 { 1269 const PetscSFNode *iremote; 1270 PetscSFNode *new_iremote; 1271 const PetscInt *ilocal; 1272 PetscInt i,nroots,*leaves,*new_ilocal; 1273 MPI_Comm comm; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1278 PetscSFCheckGraphSet(sf,1); 1279 if (nselected) PetscValidPointer(selected,3); 1280 PetscValidPointer(newsf,4); 1281 1282 /* Uniq selected[] and put results in leaves[] */ 1283 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1284 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1285 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1286 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1287 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1288 1289 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1290 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1291 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1292 } else { 1293 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1294 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1295 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1296 for (i=0; i<nselected; ++i) { 1297 const PetscInt l = leaves[i]; 1298 new_ilocal[i] = ilocal ? ilocal[l] : l; 1299 new_iremote[i].rank = iremote[l].rank; 1300 new_iremote[i].index = iremote[l].index; 1301 } 1302 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1303 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1304 } 1305 ierr = PetscFree(leaves);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 /*@C 1310 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1311 1312 Collective on PetscSF 1313 1314 Input Arguments: 1315 + sf - star forest on which to communicate 1316 . unit - data type associated with each node 1317 . rootdata - buffer to broadcast 1318 - op - operation to use for reduction 1319 1320 Output Arguments: 1321 . leafdata - buffer to be reduced with values from each leaf's respective root 1322 1323 Level: intermediate 1324 1325 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1326 @*/ 1327 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1328 { 1329 PetscErrorCode ierr; 1330 PetscMemType rootmtype,leafmtype; 1331 1332 PetscFunctionBegin; 1333 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1334 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1335 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1336 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1337 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1338 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1339 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 /*@C 1344 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1345 1346 Collective 1347 1348 Input Arguments: 1349 + sf - star forest 1350 . unit - data type 1351 . rootdata - buffer to broadcast 1352 - op - operation to use for reduction 1353 1354 Output Arguments: 1355 . leafdata - buffer to be reduced with values from each leaf's respective root 1356 1357 Level: intermediate 1358 1359 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1360 @*/ 1361 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1362 { 1363 PetscErrorCode ierr; 1364 1365 PetscFunctionBegin; 1366 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1367 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1368 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1369 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1370 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 /*@C 1375 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1376 1377 Collective 1378 1379 Input Arguments: 1380 + sf - star forest 1381 . unit - data type 1382 . leafdata - values to reduce 1383 - op - reduction operation 1384 1385 Output Arguments: 1386 . rootdata - result of reduction of values from all leaves of each root 1387 1388 Level: intermediate 1389 1390 .seealso: PetscSFBcastBegin() 1391 @*/ 1392 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1393 { 1394 PetscErrorCode ierr; 1395 PetscMemType rootmtype,leafmtype; 1396 1397 PetscFunctionBegin; 1398 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1399 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1400 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1401 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1402 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1403 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1404 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /*@C 1409 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1410 1411 Collective 1412 1413 Input Arguments: 1414 + sf - star forest 1415 . unit - data type 1416 . leafdata - values to reduce 1417 - op - reduction operation 1418 1419 Output Arguments: 1420 . rootdata - result of reduction of values from all leaves of each root 1421 1422 Level: intermediate 1423 1424 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1425 @*/ 1426 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1427 { 1428 PetscErrorCode ierr; 1429 1430 PetscFunctionBegin; 1431 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1432 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1433 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1434 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1435 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 /*@C 1440 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1441 1442 Collective 1443 1444 Input Arguments: 1445 + sf - star forest 1446 . unit - data type 1447 . leafdata - leaf values to use in reduction 1448 - op - operation to use for reduction 1449 1450 Output Arguments: 1451 + rootdata - root values to be updated, input state is seen by first process to perform an update 1452 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1453 1454 Level: advanced 1455 1456 Note: 1457 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1458 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1459 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1460 integers. 1461 1462 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1463 @*/ 1464 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1465 { 1466 PetscErrorCode ierr; 1467 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1468 1469 PetscFunctionBegin; 1470 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1471 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1472 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1473 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1474 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1475 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1476 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1477 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1478 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 /*@C 1483 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1484 1485 Collective 1486 1487 Input Arguments: 1488 + sf - star forest 1489 . unit - data type 1490 . leafdata - leaf values to use in reduction 1491 - op - operation to use for reduction 1492 1493 Output Arguments: 1494 + rootdata - root values to be updated, input state is seen by first process to perform an update 1495 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1496 1497 Level: advanced 1498 1499 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1500 @*/ 1501 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1502 { 1503 PetscErrorCode ierr; 1504 1505 PetscFunctionBegin; 1506 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1507 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1508 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1509 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1510 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 /*@C 1515 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1516 1517 Collective 1518 1519 Input Arguments: 1520 . sf - star forest 1521 1522 Output Arguments: 1523 . degree - degree of each root vertex 1524 1525 Level: advanced 1526 1527 Notes: 1528 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1529 1530 .seealso: PetscSFGatherBegin() 1531 @*/ 1532 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1533 { 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1538 PetscSFCheckGraphSet(sf,1); 1539 PetscValidPointer(degree,2); 1540 if (!sf->degreeknown) { 1541 PetscInt i, nroots = sf->nroots, maxlocal; 1542 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1543 maxlocal = sf->maxleaf-sf->minleaf+1; 1544 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1545 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1546 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1547 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1548 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1549 } 1550 *degree = NULL; 1551 PetscFunctionReturn(0); 1552 } 1553 1554 /*@C 1555 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1556 1557 Collective 1558 1559 Input Arguments: 1560 . sf - star forest 1561 1562 Output Arguments: 1563 . degree - degree of each root vertex 1564 1565 Level: developer 1566 1567 Notes: 1568 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1569 1570 .seealso: 1571 @*/ 1572 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1573 { 1574 PetscErrorCode ierr; 1575 1576 PetscFunctionBegin; 1577 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1578 PetscSFCheckGraphSet(sf,1); 1579 PetscValidPointer(degree,2); 1580 if (!sf->degreeknown) { 1581 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1582 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1583 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1584 sf->degreeknown = PETSC_TRUE; 1585 } 1586 *degree = sf->degree; 1587 PetscFunctionReturn(0); 1588 } 1589 1590 1591 /*@C 1592 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1593 Each multi-root is assigned index of the corresponding original root. 1594 1595 Collective 1596 1597 Input Arguments: 1598 + sf - star forest 1599 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1600 1601 Output Arguments: 1602 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1603 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1604 1605 Level: developer 1606 1607 Notes: 1608 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1609 1610 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1611 @*/ 1612 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1613 { 1614 PetscSF msf; 1615 PetscInt i, j, k, nroots, nmroots; 1616 PetscErrorCode ierr; 1617 1618 PetscFunctionBegin; 1619 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1620 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1621 if (nroots) PetscValidIntPointer(degree,2); 1622 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1623 PetscValidPointer(multiRootsOrigNumbering,4); 1624 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1625 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1626 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1627 for (i=0,j=0,k=0; i<nroots; i++) { 1628 if (!degree[i]) continue; 1629 for (j=0; j<degree[i]; j++,k++) { 1630 (*multiRootsOrigNumbering)[k] = i; 1631 } 1632 } 1633 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1634 if (nMultiRoots) *nMultiRoots = nmroots; 1635 PetscFunctionReturn(0); 1636 } 1637 1638 /*@C 1639 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1640 1641 Collective 1642 1643 Input Arguments: 1644 + sf - star forest 1645 . unit - data type 1646 - leafdata - leaf data to gather to roots 1647 1648 Output Argument: 1649 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1650 1651 Level: intermediate 1652 1653 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1654 @*/ 1655 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1656 { 1657 PetscErrorCode ierr; 1658 PetscSF multi = NULL; 1659 1660 PetscFunctionBegin; 1661 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1662 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1663 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1664 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 /*@C 1669 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1670 1671 Collective 1672 1673 Input Arguments: 1674 + sf - star forest 1675 . unit - data type 1676 - leafdata - leaf data to gather to roots 1677 1678 Output Argument: 1679 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1680 1681 Level: intermediate 1682 1683 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1684 @*/ 1685 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1686 { 1687 PetscErrorCode ierr; 1688 PetscSF multi = NULL; 1689 1690 PetscFunctionBegin; 1691 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1692 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1693 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1694 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 /*@C 1699 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1700 1701 Collective 1702 1703 Input Arguments: 1704 + sf - star forest 1705 . unit - data type 1706 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1707 1708 Output Argument: 1709 . leafdata - leaf data to be update with personal data from each respective root 1710 1711 Level: intermediate 1712 1713 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1714 @*/ 1715 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1716 { 1717 PetscErrorCode ierr; 1718 PetscSF multi = NULL; 1719 1720 PetscFunctionBegin; 1721 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1722 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1723 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1724 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1725 PetscFunctionReturn(0); 1726 } 1727 1728 /*@C 1729 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1730 1731 Collective 1732 1733 Input Arguments: 1734 + sf - star forest 1735 . unit - data type 1736 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1737 1738 Output Argument: 1739 . leafdata - leaf data to be update with personal data from each respective root 1740 1741 Level: intermediate 1742 1743 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1744 @*/ 1745 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1746 { 1747 PetscErrorCode ierr; 1748 PetscSF multi = NULL; 1749 1750 PetscFunctionBegin; 1751 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1752 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1753 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1754 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 1758 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1759 { 1760 PetscInt i, n, nleaves; 1761 const PetscInt *ilocal = NULL; 1762 PetscHSetI seen; 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 1767 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1768 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1769 for (i = 0; i < nleaves; i++) { 1770 const PetscInt leaf = ilocal ? ilocal[i] : i; 1771 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1772 } 1773 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1774 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1775 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 1779 /*@ 1780 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1781 1782 Input Parameters: 1783 + sfA - The first PetscSF 1784 - sfB - The second PetscSF 1785 1786 Output Parameters: 1787 . sfBA - The composite SF 1788 1789 Level: developer 1790 1791 Notes: 1792 Currently, the two SFs must be defined on congruent communicators and they must be true star 1793 forests, i.e. the same leaf is not connected with different roots. 1794 1795 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1796 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1797 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1798 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1799 1800 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1801 @*/ 1802 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1803 { 1804 PetscErrorCode ierr; 1805 const PetscSFNode *remotePointsA,*remotePointsB; 1806 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1807 const PetscInt *localPointsA,*localPointsB; 1808 PetscInt *localPointsBA; 1809 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1810 PetscBool denseB; 1811 1812 PetscFunctionBegin; 1813 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1814 PetscSFCheckGraphSet(sfA,1); 1815 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1816 PetscSFCheckGraphSet(sfB,2); 1817 PetscCheckSameComm(sfA,1,sfB,2); 1818 PetscValidPointer(sfBA,3); 1819 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1820 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1821 1822 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1823 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1824 if (localPointsA) { 1825 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1826 for (i=0; i<numRootsB; i++) { 1827 reorderedRemotePointsA[i].rank = -1; 1828 reorderedRemotePointsA[i].index = -1; 1829 } 1830 for (i=0; i<numLeavesA; i++) { 1831 if (localPointsA[i] >= numRootsB) continue; 1832 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1833 } 1834 remotePointsA = reorderedRemotePointsA; 1835 } 1836 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1837 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1838 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1839 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1840 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1841 1842 denseB = (PetscBool)!localPointsB; 1843 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1844 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1845 else numLeavesBA++; 1846 } 1847 if (denseB) { 1848 localPointsBA = NULL; 1849 remotePointsBA = leafdataB; 1850 } else { 1851 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1852 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1853 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1854 const PetscInt l = localPointsB ? localPointsB[i] : i; 1855 1856 if (leafdataB[l-minleaf].rank == -1) continue; 1857 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1858 localPointsBA[numLeavesBA] = l; 1859 numLeavesBA++; 1860 } 1861 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1862 } 1863 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1864 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1865 PetscFunctionReturn(0); 1866 } 1867 1868 /*@ 1869 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1870 1871 Input Parameters: 1872 + sfA - The first PetscSF 1873 - sfB - The second PetscSF 1874 1875 Output Parameters: 1876 . sfBA - The composite SF. 1877 1878 Level: developer 1879 1880 Notes: 1881 Currently, the two SFs must be defined on congruent communicators and they must be true star 1882 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1883 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1884 1885 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1886 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1887 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1888 a Reduce on sfB, on connected roots. 1889 1890 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1891 @*/ 1892 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1893 { 1894 PetscErrorCode ierr; 1895 const PetscSFNode *remotePointsA,*remotePointsB; 1896 PetscSFNode *remotePointsBA; 1897 const PetscInt *localPointsA,*localPointsB; 1898 PetscSFNode *reorderedRemotePointsA = NULL; 1899 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1900 MPI_Op op; 1901 #if defined(PETSC_USE_64BIT_INDICES) 1902 PetscBool iswin; 1903 #endif 1904 1905 PetscFunctionBegin; 1906 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1907 PetscSFCheckGraphSet(sfA,1); 1908 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1909 PetscSFCheckGraphSet(sfB,2); 1910 PetscCheckSameComm(sfA,1,sfB,2); 1911 PetscValidPointer(sfBA,3); 1912 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1913 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1914 1915 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1916 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1917 1918 /* TODO: Check roots of sfB have degree of 1 */ 1919 /* Once we implement it, we can replace the MPI_MAXLOC 1920 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1921 We use MPI_MAXLOC only to have a deterministic output from this routine if 1922 the root condition is not meet. 1923 */ 1924 op = MPI_MAXLOC; 1925 #if defined(PETSC_USE_64BIT_INDICES) 1926 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 1927 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 1928 if (iswin) op = MPIU_REPLACE; 1929 #endif 1930 1931 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1932 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1933 for (i=0; i<maxleaf - minleaf + 1; i++) { 1934 reorderedRemotePointsA[i].rank = -1; 1935 reorderedRemotePointsA[i].index = -1; 1936 } 1937 if (localPointsA) { 1938 for (i=0; i<numLeavesA; i++) { 1939 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1940 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1941 } 1942 } else { 1943 for (i=0; i<numLeavesA; i++) { 1944 if (i > maxleaf || i < minleaf) continue; 1945 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1946 } 1947 } 1948 1949 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1950 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1951 for (i=0; i<numRootsB; i++) { 1952 remotePointsBA[i].rank = -1; 1953 remotePointsBA[i].index = -1; 1954 } 1955 1956 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1957 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1958 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1959 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1960 if (remotePointsBA[i].rank == -1) continue; 1961 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1962 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1963 localPointsBA[numLeavesBA] = i; 1964 numLeavesBA++; 1965 } 1966 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1967 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /* 1972 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1973 1974 Input Parameters: 1975 . sf - The global PetscSF 1976 1977 Output Parameters: 1978 . out - The local PetscSF 1979 */ 1980 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1981 { 1982 MPI_Comm comm; 1983 PetscMPIInt myrank; 1984 const PetscInt *ilocal; 1985 const PetscSFNode *iremote; 1986 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1987 PetscSFNode *liremote; 1988 PetscSF lsf; 1989 PetscErrorCode ierr; 1990 1991 PetscFunctionBegin; 1992 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1993 if (sf->ops->CreateLocalSF) { 1994 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1995 } else { 1996 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 1997 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1998 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 1999 2000 /* Find out local edges and build a local SF */ 2001 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2002 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2003 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2004 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2005 2006 for (i=j=0; i<nleaves; i++) { 2007 if (iremote[i].rank == (PetscInt)myrank) { 2008 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2009 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2010 liremote[j].index = iremote[i].index; 2011 j++; 2012 } 2013 } 2014 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2015 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2016 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2017 *out = lsf; 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 2022 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2023 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2024 { 2025 PetscErrorCode ierr; 2026 PetscMemType rootmtype,leafmtype; 2027 2028 PetscFunctionBegin; 2029 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2030 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2031 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2032 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2033 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2034 if (sf->ops->BcastToZero) { 2035 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2036 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2037 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041