1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 { 98 PetscInt i; 99 for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}} 100 } 101 #endif 102 sf->degreeknown = PETSC_FALSE; 103 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 104 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 105 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 106 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 107 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 108 sf->setupcalled = PETSC_FALSE; 109 PetscFunctionReturn(0); 110 } 111 112 /*@C 113 PetscSFSetType - Set the PetscSF communication implementation 114 115 Collective on PetscSF 116 117 Input Parameters: 118 + sf - the PetscSF context 119 - type - a known method 120 121 Options Database Key: 122 . -sf_type <type> - Sets the method; use -help for a list 123 of available methods (for instance, window, basic, neighbor) 124 125 Notes: 126 See "include/petscsf.h" for available methods (for instance) 127 + PETSCSFWINDOW - MPI-2/3 one-sided 128 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 129 130 Level: intermediate 131 132 .seealso: PetscSFType, PetscSFCreate() 133 @*/ 134 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 135 { 136 PetscErrorCode ierr,(*r)(PetscSF); 137 PetscBool match; 138 139 PetscFunctionBegin; 140 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 141 PetscValidCharPointer(type,2); 142 143 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 144 if (match) PetscFunctionReturn(0); 145 146 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 147 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 148 /* Destroy the previous PetscSF implementation context */ 149 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 150 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 151 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 152 ierr = (*r)(sf);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 /*@C 157 PetscSFGetType - Get the PetscSF communication implementation 158 159 Not Collective 160 161 Input Parameter: 162 . sf - the PetscSF context 163 164 Output Parameter: 165 . type - the PetscSF type name 166 167 Level: intermediate 168 169 .seealso: PetscSFSetType(), PetscSFCreate() 170 @*/ 171 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 172 { 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 175 PetscValidPointer(type,2); 176 *type = ((PetscObject)sf)->type_name; 177 PetscFunctionReturn(0); 178 } 179 180 /*@ 181 PetscSFDestroy - destroy star forest 182 183 Collective 184 185 Input Arguments: 186 . sf - address of star forest 187 188 Level: intermediate 189 190 .seealso: PetscSFCreate(), PetscSFReset() 191 @*/ 192 PetscErrorCode PetscSFDestroy(PetscSF *sf) 193 { 194 PetscErrorCode ierr; 195 196 PetscFunctionBegin; 197 if (!*sf) PetscFunctionReturn(0); 198 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 199 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 200 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 201 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 202 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 207 { 208 #if defined(PETSC_USE_DEBUG) 209 PetscInt i, nleaves; 210 PetscMPIInt size; 211 const PetscInt *ilocal; 212 const PetscSFNode *iremote; 213 PetscErrorCode ierr; 214 215 PetscFunctionBegin; 216 if (!sf->graphset) PetscFunctionReturn(0); 217 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 218 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 219 for (i = 0; i < nleaves; i++) { 220 const PetscInt rank = iremote[i].rank; 221 const PetscInt remote = iremote[i].index; 222 const PetscInt leaf = ilocal ? ilocal[i] : i; 223 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 224 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 225 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 226 } 227 PetscFunctionReturn(0); 228 #else 229 PetscFunctionBegin; 230 PetscFunctionReturn(0); 231 #endif 232 } 233 234 /*@ 235 PetscSFSetUp - set up communication structures 236 237 Collective 238 239 Input Arguments: 240 . sf - star forest communication object 241 242 Level: beginner 243 244 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 245 @*/ 246 PetscErrorCode PetscSFSetUp(PetscSF sf) 247 { 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 252 PetscSFCheckGraphSet(sf,1); 253 if (sf->setupcalled) PetscFunctionReturn(0); 254 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 255 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 256 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 257 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 258 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 259 sf->setupcalled = PETSC_TRUE; 260 PetscFunctionReturn(0); 261 } 262 263 /*@ 264 PetscSFSetFromOptions - set PetscSF options using the options database 265 266 Logically Collective 267 268 Input Arguments: 269 . sf - star forest 270 271 Options Database Keys: 272 + -sf_type - implementation type, see PetscSFSetType() 273 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 274 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 275 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller. 276 - -sf_use_stream_aware_mpi - Assumes the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI. 277 278 Level: intermediate 279 @*/ 280 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 281 { 282 PetscSFType deft; 283 char type[256]; 284 PetscErrorCode ierr; 285 PetscBool flg; 286 287 PetscFunctionBegin; 288 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 289 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 290 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 291 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 292 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 293 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 294 sf->use_default_stream = PETSC_FALSE; 295 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 296 sf->use_stream_aware_mpi = PETSC_FALSE; 297 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 298 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 299 ierr = PetscOptionsEnd();CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 /*@ 304 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 305 306 Logically Collective 307 308 Input Arguments: 309 + sf - star forest 310 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 311 312 Level: advanced 313 314 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 315 @*/ 316 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 317 { 318 PetscFunctionBegin; 319 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 320 PetscValidLogicalCollectiveBool(sf,flg,2); 321 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 322 sf->rankorder = flg; 323 PetscFunctionReturn(0); 324 } 325 326 /*@ 327 PetscSFSetGraph - Set a parallel star forest 328 329 Collective 330 331 Input Arguments: 332 + sf - star forest 333 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 334 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 335 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 336 during setup in debug mode) 337 . localmode - copy mode for ilocal 338 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 339 during setup in debug mode) 340 - remotemode - copy mode for iremote 341 342 Level: intermediate 343 344 Notes: 345 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 346 347 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 348 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 349 needed 350 351 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 352 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 353 354 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 355 @*/ 356 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 357 { 358 PetscErrorCode ierr; 359 360 PetscFunctionBegin; 361 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 362 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 363 if (nleaves > 0) PetscValidPointer(iremote,6); 364 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 365 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 366 367 if (sf->nroots >= 0) { /* Reset only if graph already set */ 368 ierr = PetscSFReset(sf);CHKERRQ(ierr); 369 } 370 371 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 372 373 sf->nroots = nroots; 374 sf->nleaves = nleaves; 375 376 if (nleaves && ilocal) { 377 PetscInt i; 378 PetscInt minleaf = PETSC_MAX_INT; 379 PetscInt maxleaf = PETSC_MIN_INT; 380 int contiguous = 1; 381 for (i=0; i<nleaves; i++) { 382 minleaf = PetscMin(minleaf,ilocal[i]); 383 maxleaf = PetscMax(maxleaf,ilocal[i]); 384 contiguous &= (ilocal[i] == i); 385 } 386 sf->minleaf = minleaf; 387 sf->maxleaf = maxleaf; 388 if (contiguous) { 389 if (localmode == PETSC_OWN_POINTER) { 390 ierr = PetscFree(ilocal);CHKERRQ(ierr); 391 } 392 ilocal = NULL; 393 } 394 } else { 395 sf->minleaf = 0; 396 sf->maxleaf = nleaves - 1; 397 } 398 399 if (ilocal) { 400 switch (localmode) { 401 case PETSC_COPY_VALUES: 402 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 403 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 404 sf->mine = sf->mine_alloc; 405 break; 406 case PETSC_OWN_POINTER: 407 sf->mine_alloc = (PetscInt*)ilocal; 408 sf->mine = sf->mine_alloc; 409 break; 410 case PETSC_USE_POINTER: 411 sf->mine_alloc = NULL; 412 sf->mine = (PetscInt*)ilocal; 413 break; 414 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 415 } 416 } 417 418 switch (remotemode) { 419 case PETSC_COPY_VALUES: 420 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 421 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 422 sf->remote = sf->remote_alloc; 423 break; 424 case PETSC_OWN_POINTER: 425 sf->remote_alloc = (PetscSFNode*)iremote; 426 sf->remote = sf->remote_alloc; 427 break; 428 case PETSC_USE_POINTER: 429 sf->remote_alloc = NULL; 430 sf->remote = (PetscSFNode*)iremote; 431 break; 432 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 433 } 434 435 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 436 sf->graphset = PETSC_TRUE; 437 PetscFunctionReturn(0); 438 } 439 440 /*@ 441 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 442 443 Collective 444 445 Input Parameters: 446 + sf - The PetscSF 447 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 448 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 449 450 Notes: 451 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 452 n and N are local and global sizes of x respectively. 453 454 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 455 sequential vectors y on all ranks. 456 457 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 458 sequential vector y on rank 0. 459 460 In above cases, entries of x are roots and entries of y are leaves. 461 462 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 463 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 464 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 465 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 466 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 467 468 In this case, roots and leaves are symmetric. 469 470 Level: intermediate 471 @*/ 472 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 473 { 474 MPI_Comm comm; 475 PetscInt n,N,res[2]; 476 PetscMPIInt rank,size; 477 PetscSFType type; 478 PetscErrorCode ierr; 479 480 PetscFunctionBegin; 481 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 482 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 483 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 484 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 485 486 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 487 type = PETSCSFALLTOALL; 488 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 489 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 490 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 491 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 492 } else { 493 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 494 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 495 res[0] = n; 496 res[1] = -n; 497 /* Check if n are same over all ranks so that we can optimize it */ 498 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 499 if (res[0] == -res[1]) { /* same n */ 500 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 501 } else { 502 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 503 } 504 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 505 } 506 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 507 508 sf->pattern = pattern; 509 sf->mine = NULL; /* Contiguous */ 510 511 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 512 Also set other easy stuff. 513 */ 514 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 515 sf->nleaves = N; 516 sf->nroots = n; 517 sf->nranks = size; 518 sf->minleaf = 0; 519 sf->maxleaf = N - 1; 520 } else if (pattern == PETSCSF_PATTERN_GATHER) { 521 sf->nleaves = rank ? 0 : N; 522 sf->nroots = n; 523 sf->nranks = rank ? 0 : size; 524 sf->minleaf = 0; 525 sf->maxleaf = rank ? -1 : N - 1; 526 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 527 sf->nleaves = size; 528 sf->nroots = size; 529 sf->nranks = size; 530 sf->minleaf = 0; 531 sf->maxleaf = size - 1; 532 } 533 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 534 sf->graphset = PETSC_TRUE; 535 PetscFunctionReturn(0); 536 } 537 538 /*@ 539 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 540 541 Collective 542 543 Input Arguments: 544 545 . sf - star forest to invert 546 547 Output Arguments: 548 . isf - inverse of sf 549 Level: advanced 550 551 Notes: 552 All roots must have degree 1. 553 554 The local space may be a permutation, but cannot be sparse. 555 556 .seealso: PetscSFSetGraph() 557 @*/ 558 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 559 { 560 PetscErrorCode ierr; 561 PetscMPIInt rank; 562 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 563 const PetscInt *ilocal; 564 PetscSFNode *roots,*leaves; 565 566 PetscFunctionBegin; 567 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 568 PetscSFCheckGraphSet(sf,1); 569 PetscValidPointer(isf,2); 570 571 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 572 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 573 574 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 575 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 576 for (i=0; i<maxlocal; i++) { 577 leaves[i].rank = rank; 578 leaves[i].index = i; 579 } 580 for (i=0; i <nroots; i++) { 581 roots[i].rank = -1; 582 roots[i].index = -1; 583 } 584 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 585 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 586 587 /* Check whether our leaves are sparse */ 588 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 589 if (count == nroots) newilocal = NULL; 590 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 591 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 592 for (i=0,count=0; i<nroots; i++) { 593 if (roots[i].rank >= 0) { 594 newilocal[count] = i; 595 roots[count].rank = roots[i].rank; 596 roots[count].index = roots[i].index; 597 count++; 598 } 599 } 600 } 601 602 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 603 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 604 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 /*@ 609 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 610 611 Collective 612 613 Input Arguments: 614 + sf - communication object to duplicate 615 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 616 617 Output Arguments: 618 . newsf - new communication object 619 620 Level: beginner 621 622 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 623 @*/ 624 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 625 { 626 PetscSFType type; 627 PetscErrorCode ierr; 628 629 PetscFunctionBegin; 630 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 631 PetscValidLogicalCollectiveEnum(sf,opt,2); 632 PetscValidPointer(newsf,3); 633 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 634 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 635 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 636 if (opt == PETSCSF_DUPLICATE_GRAPH) { 637 PetscSFCheckGraphSet(sf,1); 638 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 639 PetscInt nroots,nleaves; 640 const PetscInt *ilocal; 641 const PetscSFNode *iremote; 642 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 643 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 644 } else { 645 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 646 } 647 } 648 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 649 PetscFunctionReturn(0); 650 } 651 652 /*@C 653 PetscSFGetGraph - Get the graph specifying a parallel star forest 654 655 Not Collective 656 657 Input Arguments: 658 . sf - star forest 659 660 Output Arguments: 661 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 662 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 663 . ilocal - locations of leaves in leafdata buffers 664 - iremote - remote locations of root vertices for each leaf on the current process 665 666 Notes: 667 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 668 669 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 670 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 671 672 Level: intermediate 673 674 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 675 @*/ 676 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 677 { 678 PetscErrorCode ierr; 679 680 PetscFunctionBegin; 681 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 682 if (sf->ops->GetGraph) { 683 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 684 } else { 685 if (nroots) *nroots = sf->nroots; 686 if (nleaves) *nleaves = sf->nleaves; 687 if (ilocal) *ilocal = sf->mine; 688 if (iremote) *iremote = sf->remote; 689 } 690 PetscFunctionReturn(0); 691 } 692 693 /*@ 694 PetscSFGetLeafRange - Get the active leaf ranges 695 696 Not Collective 697 698 Input Arguments: 699 . sf - star forest 700 701 Output Arguments: 702 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 703 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 704 705 Level: developer 706 707 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 708 @*/ 709 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 710 { 711 712 PetscFunctionBegin; 713 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 714 PetscSFCheckGraphSet(sf,1); 715 if (minleaf) *minleaf = sf->minleaf; 716 if (maxleaf) *maxleaf = sf->maxleaf; 717 PetscFunctionReturn(0); 718 } 719 720 /*@C 721 PetscSFViewFromOptions - View from Options 722 723 Collective on PetscSF 724 725 Input Parameters: 726 + A - the star forest 727 . obj - Optional object 728 - name - command line option 729 730 Level: intermediate 731 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 732 @*/ 733 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 734 { 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 739 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 /*@C 744 PetscSFView - view a star forest 745 746 Collective 747 748 Input Arguments: 749 + sf - star forest 750 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 751 752 Level: beginner 753 754 .seealso: PetscSFCreate(), PetscSFSetGraph() 755 @*/ 756 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 757 { 758 PetscErrorCode ierr; 759 PetscBool iascii; 760 PetscViewerFormat format; 761 762 PetscFunctionBegin; 763 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 764 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 765 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 766 PetscCheckSameComm(sf,1,viewer,2); 767 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 768 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 769 if (iascii) { 770 PetscMPIInt rank; 771 PetscInt ii,i,j; 772 773 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 774 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 775 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 776 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 777 if (!sf->graphset) { 778 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 779 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 783 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 784 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 785 for (i=0; i<sf->nleaves; i++) { 786 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 787 } 788 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 789 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 790 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 791 PetscMPIInt *tmpranks,*perm; 792 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 793 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 794 for (i=0; i<sf->nranks; i++) perm[i] = i; 795 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 796 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 797 for (ii=0; ii<sf->nranks; ii++) { 798 i = perm[ii]; 799 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 800 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 801 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 802 } 803 } 804 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 805 } 806 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 807 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 808 } 809 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 810 } 811 PetscFunctionReturn(0); 812 } 813 814 /*@C 815 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 816 817 Not Collective 818 819 Input Arguments: 820 . sf - star forest 821 822 Output Arguments: 823 + nranks - number of ranks referenced by local part 824 . ranks - array of ranks 825 . roffset - offset in rmine/rremote for each rank (length nranks+1) 826 . rmine - concatenated array holding local indices referencing each remote rank 827 - rremote - concatenated array holding remote indices referenced for each remote rank 828 829 Level: developer 830 831 .seealso: PetscSFGetLeafRanks() 832 @*/ 833 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 834 { 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 839 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 840 if (sf->ops->GetRootRanks) { 841 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 842 } else { 843 /* The generic implementation */ 844 if (nranks) *nranks = sf->nranks; 845 if (ranks) *ranks = sf->ranks; 846 if (roffset) *roffset = sf->roffset; 847 if (rmine) *rmine = sf->rmine; 848 if (rremote) *rremote = sf->rremote; 849 } 850 PetscFunctionReturn(0); 851 } 852 853 /*@C 854 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 855 856 Not Collective 857 858 Input Arguments: 859 . sf - star forest 860 861 Output Arguments: 862 + niranks - number of leaf ranks referencing roots on this process 863 . iranks - array of ranks 864 . ioffset - offset in irootloc for each rank (length niranks+1) 865 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 866 867 Level: developer 868 869 .seealso: PetscSFGetRootRanks() 870 @*/ 871 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 872 { 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 877 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 878 if (sf->ops->GetLeafRanks) { 879 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 880 } else { 881 PetscSFType type; 882 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 883 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 884 } 885 PetscFunctionReturn(0); 886 } 887 888 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 889 PetscInt i; 890 for (i=0; i<n; i++) { 891 if (needle == list[i]) return PETSC_TRUE; 892 } 893 return PETSC_FALSE; 894 } 895 896 /*@C 897 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 898 899 Collective 900 901 Input Arguments: 902 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 903 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 904 905 Level: developer 906 907 .seealso: PetscSFGetRootRanks() 908 @*/ 909 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 910 { 911 PetscErrorCode ierr; 912 PetscTable table; 913 PetscTablePosition pos; 914 PetscMPIInt size,groupsize,*groupranks; 915 PetscInt *rcount,*ranks; 916 PetscInt i, irank = -1,orank = -1; 917 918 PetscFunctionBegin; 919 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 920 PetscSFCheckGraphSet(sf,1); 921 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 922 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 923 for (i=0; i<sf->nleaves; i++) { 924 /* Log 1-based rank */ 925 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 926 } 927 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 928 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 929 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 930 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 931 for (i=0; i<sf->nranks; i++) { 932 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 933 ranks[i]--; /* Convert back to 0-based */ 934 } 935 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 936 937 /* We expect that dgroup is reliably "small" while nranks could be large */ 938 { 939 MPI_Group group = MPI_GROUP_NULL; 940 PetscMPIInt *dgroupranks; 941 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 942 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 943 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 944 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 945 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 946 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 947 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 948 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 949 } 950 951 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 952 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 953 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 954 if (InList(ranks[i],groupsize,groupranks)) break; 955 } 956 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 957 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 958 } 959 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 960 PetscInt tmprank,tmpcount; 961 962 tmprank = ranks[i]; 963 tmpcount = rcount[i]; 964 ranks[i] = ranks[sf->ndranks]; 965 rcount[i] = rcount[sf->ndranks]; 966 ranks[sf->ndranks] = tmprank; 967 rcount[sf->ndranks] = tmpcount; 968 sf->ndranks++; 969 } 970 } 971 ierr = PetscFree(groupranks);CHKERRQ(ierr); 972 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 973 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 974 sf->roffset[0] = 0; 975 for (i=0; i<sf->nranks; i++) { 976 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 977 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 978 rcount[i] = 0; 979 } 980 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 981 /* short circuit */ 982 if (orank != sf->remote[i].rank) { 983 /* Search for index of iremote[i].rank in sf->ranks */ 984 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 985 if (irank < 0) { 986 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 987 if (irank >= 0) irank += sf->ndranks; 988 } 989 orank = sf->remote[i].rank; 990 } 991 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 992 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 993 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 994 rcount[irank]++; 995 } 996 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 1000 /*@C 1001 PetscSFGetGroups - gets incoming and outgoing process groups 1002 1003 Collective 1004 1005 Input Argument: 1006 . sf - star forest 1007 1008 Output Arguments: 1009 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1010 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1011 1012 Level: developer 1013 1014 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1015 @*/ 1016 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1017 { 1018 PetscErrorCode ierr; 1019 MPI_Group group = MPI_GROUP_NULL; 1020 1021 PetscFunctionBegin; 1022 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1023 if (sf->ingroup == MPI_GROUP_NULL) { 1024 PetscInt i; 1025 const PetscInt *indegree; 1026 PetscMPIInt rank,*outranks,*inranks; 1027 PetscSFNode *remote; 1028 PetscSF bgcount; 1029 1030 /* Compute the number of incoming ranks */ 1031 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1032 for (i=0; i<sf->nranks; i++) { 1033 remote[i].rank = sf->ranks[i]; 1034 remote[i].index = 0; 1035 } 1036 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1037 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1038 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1039 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1040 /* Enumerate the incoming ranks */ 1041 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1042 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1043 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1044 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1045 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1046 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1047 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1048 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1049 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1050 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1051 } 1052 *incoming = sf->ingroup; 1053 1054 if (sf->outgroup == MPI_GROUP_NULL) { 1055 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1056 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1057 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1058 } 1059 *outgoing = sf->outgroup; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 /*@ 1064 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1065 1066 Collective 1067 1068 Input Argument: 1069 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1070 1071 Output Arguments: 1072 . multi - star forest with split roots, such that each root has degree exactly 1 1073 1074 Level: developer 1075 1076 Notes: 1077 1078 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1079 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1080 edge, it is a candidate for future optimization that might involve its removal. 1081 1082 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1083 @*/ 1084 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1085 { 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1090 PetscValidPointer(multi,2); 1091 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1092 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1093 *multi = sf->multi; 1094 PetscFunctionReturn(0); 1095 } 1096 if (!sf->multi) { 1097 const PetscInt *indegree; 1098 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1099 PetscSFNode *remote; 1100 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1101 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1102 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1103 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1104 inoffset[0] = 0; 1105 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1106 for (i=0; i<maxlocal; i++) outones[i] = 1; 1107 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1108 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1109 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1110 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1111 for (i=0; i<sf->nroots; i++) { 1112 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1113 } 1114 #endif 1115 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1116 for (i=0; i<sf->nleaves; i++) { 1117 remote[i].rank = sf->remote[i].rank; 1118 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1119 } 1120 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1121 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1122 if (sf->rankorder) { /* Sort the ranks */ 1123 PetscMPIInt rank; 1124 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1125 PetscSFNode *newremote; 1126 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1127 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1128 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1129 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1130 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1131 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1132 /* Sort the incoming ranks at each vertex, build the inverse map */ 1133 for (i=0; i<sf->nroots; i++) { 1134 PetscInt j; 1135 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1136 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1137 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1138 } 1139 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1140 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1142 for (i=0; i<sf->nleaves; i++) { 1143 newremote[i].rank = sf->remote[i].rank; 1144 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1145 } 1146 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1147 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1148 } 1149 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1150 } 1151 *multi = sf->multi; 1152 PetscFunctionReturn(0); 1153 } 1154 1155 /*@C 1156 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1157 1158 Collective 1159 1160 Input Arguments: 1161 + sf - original star forest 1162 . nselected - number of selected roots on this process 1163 - selected - indices of the selected roots on this process 1164 1165 Output Arguments: 1166 . esf - new star forest 1167 1168 Level: advanced 1169 1170 Note: 1171 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1172 be done by calling PetscSFGetGraph(). 1173 1174 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1175 @*/ 1176 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1177 { 1178 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1179 const PetscInt *ilocal; 1180 signed char *rootdata,*leafdata,*leafmem; 1181 const PetscSFNode *iremote; 1182 PetscSFNode *new_iremote; 1183 MPI_Comm comm; 1184 PetscErrorCode ierr; 1185 1186 PetscFunctionBegin; 1187 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1188 PetscSFCheckGraphSet(sf,1); 1189 if (nselected) PetscValidPointer(selected,3); 1190 PetscValidPointer(esf,4); 1191 1192 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1193 1194 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1195 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1196 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1197 1198 #if defined(PETSC_USE_DEBUG) 1199 /* Error out if selected[] has dups or out of range indices */ 1200 { 1201 PetscBool dups; 1202 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1203 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1204 for (i=0; i<nselected; i++) 1205 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1206 } 1207 #endif 1208 1209 if (sf->ops->CreateEmbeddedSF) { 1210 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1211 } else { 1212 /* A generic version of creating embedded sf */ 1213 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1214 maxlocal = maxleaf - minleaf + 1; 1215 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1216 leafdata = leafmem - minleaf; 1217 /* Tag selected roots and bcast to leaves */ 1218 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1219 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1220 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1221 1222 /* Build esf with leaves that are still connected */ 1223 esf_nleaves = 0; 1224 for (i=0; i<nleaves; i++) { 1225 j = ilocal ? ilocal[i] : i; 1226 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1227 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1228 */ 1229 esf_nleaves += (leafdata[j] ? 1 : 0); 1230 } 1231 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1232 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1233 for (i=n=0; i<nleaves; i++) { 1234 j = ilocal ? ilocal[i] : i; 1235 if (leafdata[j]) { 1236 new_ilocal[n] = j; 1237 new_iremote[n].rank = iremote[i].rank; 1238 new_iremote[n].index = iremote[i].index; 1239 ++n; 1240 } 1241 } 1242 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1243 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1244 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1245 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1246 } 1247 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 /*@C 1252 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1253 1254 Collective 1255 1256 Input Arguments: 1257 + sf - original star forest 1258 . nselected - number of selected leaves on this process 1259 - selected - indices of the selected leaves on this process 1260 1261 Output Arguments: 1262 . newsf - new star forest 1263 1264 Level: advanced 1265 1266 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1267 @*/ 1268 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1269 { 1270 const PetscSFNode *iremote; 1271 PetscSFNode *new_iremote; 1272 const PetscInt *ilocal; 1273 PetscInt i,nroots,*leaves,*new_ilocal; 1274 MPI_Comm comm; 1275 PetscErrorCode ierr; 1276 1277 PetscFunctionBegin; 1278 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1279 PetscSFCheckGraphSet(sf,1); 1280 if (nselected) PetscValidPointer(selected,3); 1281 PetscValidPointer(newsf,4); 1282 1283 /* Uniq selected[] and put results in leaves[] */ 1284 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1285 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1286 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1287 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1288 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1289 1290 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1291 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1292 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1293 } else { 1294 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1295 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1296 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1297 for (i=0; i<nselected; ++i) { 1298 const PetscInt l = leaves[i]; 1299 new_ilocal[i] = ilocal ? ilocal[l] : l; 1300 new_iremote[i].rank = iremote[l].rank; 1301 new_iremote[i].index = iremote[l].index; 1302 } 1303 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1304 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1305 } 1306 ierr = PetscFree(leaves);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 /*@C 1311 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1312 1313 Collective on PetscSF 1314 1315 Input Arguments: 1316 + sf - star forest on which to communicate 1317 . unit - data type associated with each node 1318 . rootdata - buffer to broadcast 1319 - op - operation to use for reduction 1320 1321 Output Arguments: 1322 . leafdata - buffer to be reduced with values from each leaf's respective root 1323 1324 Level: intermediate 1325 1326 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1327 @*/ 1328 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1329 { 1330 PetscErrorCode ierr; 1331 PetscMemType rootmtype,leafmtype; 1332 1333 PetscFunctionBegin; 1334 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1335 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1336 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1337 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1338 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1339 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1340 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 /*@C 1345 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1346 1347 Collective 1348 1349 Input Arguments: 1350 + sf - star forest 1351 . unit - data type 1352 . rootdata - buffer to broadcast 1353 - op - operation to use for reduction 1354 1355 Output Arguments: 1356 . leafdata - buffer to be reduced with values from each leaf's respective root 1357 1358 Level: intermediate 1359 1360 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1361 @*/ 1362 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1363 { 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1368 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1369 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1370 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1371 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 /*@C 1376 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1377 1378 Collective 1379 1380 Input Arguments: 1381 + sf - star forest 1382 . unit - data type 1383 . leafdata - values to reduce 1384 - op - reduction operation 1385 1386 Output Arguments: 1387 . rootdata - result of reduction of values from all leaves of each root 1388 1389 Level: intermediate 1390 1391 .seealso: PetscSFBcastBegin() 1392 @*/ 1393 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1394 { 1395 PetscErrorCode ierr; 1396 PetscMemType rootmtype,leafmtype; 1397 1398 PetscFunctionBegin; 1399 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1400 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1401 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1402 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1403 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1404 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1405 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 /*@C 1410 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1411 1412 Collective 1413 1414 Input Arguments: 1415 + sf - star forest 1416 . unit - data type 1417 . leafdata - values to reduce 1418 - op - reduction operation 1419 1420 Output Arguments: 1421 . rootdata - result of reduction of values from all leaves of each root 1422 1423 Level: intermediate 1424 1425 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1426 @*/ 1427 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1428 { 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1433 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1434 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1435 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1436 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 /*@C 1441 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1442 1443 Collective 1444 1445 Input Arguments: 1446 + sf - star forest 1447 . unit - data type 1448 . leafdata - leaf values to use in reduction 1449 - op - operation to use for reduction 1450 1451 Output Arguments: 1452 + rootdata - root values to be updated, input state is seen by first process to perform an update 1453 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1454 1455 Level: advanced 1456 1457 Note: 1458 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1459 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1460 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1461 integers. 1462 1463 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1464 @*/ 1465 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1466 { 1467 PetscErrorCode ierr; 1468 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1469 1470 PetscFunctionBegin; 1471 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1472 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1473 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1474 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1475 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1476 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1477 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1478 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1479 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 /*@C 1484 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1485 1486 Collective 1487 1488 Input Arguments: 1489 + sf - star forest 1490 . unit - data type 1491 . leafdata - leaf values to use in reduction 1492 - op - operation to use for reduction 1493 1494 Output Arguments: 1495 + rootdata - root values to be updated, input state is seen by first process to perform an update 1496 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1497 1498 Level: advanced 1499 1500 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1501 @*/ 1502 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1503 { 1504 PetscErrorCode ierr; 1505 1506 PetscFunctionBegin; 1507 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1508 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1509 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1510 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1511 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 /*@C 1516 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1517 1518 Collective 1519 1520 Input Arguments: 1521 . sf - star forest 1522 1523 Output Arguments: 1524 . degree - degree of each root vertex 1525 1526 Level: advanced 1527 1528 Notes: 1529 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1530 1531 .seealso: PetscSFGatherBegin() 1532 @*/ 1533 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1534 { 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1539 PetscSFCheckGraphSet(sf,1); 1540 PetscValidPointer(degree,2); 1541 if (!sf->degreeknown) { 1542 PetscInt i, nroots = sf->nroots, maxlocal; 1543 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1544 maxlocal = sf->maxleaf-sf->minleaf+1; 1545 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1546 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1547 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1548 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1549 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1550 } 1551 *degree = NULL; 1552 PetscFunctionReturn(0); 1553 } 1554 1555 /*@C 1556 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1557 1558 Collective 1559 1560 Input Arguments: 1561 . sf - star forest 1562 1563 Output Arguments: 1564 . degree - degree of each root vertex 1565 1566 Level: developer 1567 1568 Notes: 1569 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1570 1571 .seealso: 1572 @*/ 1573 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1574 { 1575 PetscErrorCode ierr; 1576 1577 PetscFunctionBegin; 1578 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1579 PetscSFCheckGraphSet(sf,1); 1580 PetscValidPointer(degree,2); 1581 if (!sf->degreeknown) { 1582 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1583 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1584 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1585 sf->degreeknown = PETSC_TRUE; 1586 } 1587 *degree = sf->degree; 1588 PetscFunctionReturn(0); 1589 } 1590 1591 1592 /*@C 1593 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1594 Each multi-root is assigned index of the corresponding original root. 1595 1596 Collective 1597 1598 Input Arguments: 1599 + sf - star forest 1600 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1601 1602 Output Arguments: 1603 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1604 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1605 1606 Level: developer 1607 1608 Notes: 1609 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1610 1611 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1612 @*/ 1613 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1614 { 1615 PetscSF msf; 1616 PetscInt i, j, k, nroots, nmroots; 1617 PetscErrorCode ierr; 1618 1619 PetscFunctionBegin; 1620 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1621 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1622 if (nroots) PetscValidIntPointer(degree,2); 1623 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1624 PetscValidPointer(multiRootsOrigNumbering,4); 1625 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1626 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1627 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1628 for (i=0,j=0,k=0; i<nroots; i++) { 1629 if (!degree[i]) continue; 1630 for (j=0; j<degree[i]; j++,k++) { 1631 (*multiRootsOrigNumbering)[k] = i; 1632 } 1633 } 1634 #if defined(PETSC_USE_DEBUG) 1635 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1636 #endif 1637 if (nMultiRoots) *nMultiRoots = nmroots; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 /*@C 1642 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1643 1644 Collective 1645 1646 Input Arguments: 1647 + sf - star forest 1648 . unit - data type 1649 - leafdata - leaf data to gather to roots 1650 1651 Output Argument: 1652 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1653 1654 Level: intermediate 1655 1656 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1657 @*/ 1658 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1659 { 1660 PetscErrorCode ierr; 1661 PetscSF multi; 1662 1663 PetscFunctionBegin; 1664 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1665 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1666 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1667 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 /*@C 1672 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1673 1674 Collective 1675 1676 Input Arguments: 1677 + sf - star forest 1678 . unit - data type 1679 - leafdata - leaf data to gather to roots 1680 1681 Output Argument: 1682 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1683 1684 Level: intermediate 1685 1686 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1687 @*/ 1688 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1689 { 1690 PetscErrorCode ierr; 1691 PetscSF multi; 1692 1693 PetscFunctionBegin; 1694 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1695 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1696 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1697 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 /*@C 1702 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1703 1704 Collective 1705 1706 Input Arguments: 1707 + sf - star forest 1708 . unit - data type 1709 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1710 1711 Output Argument: 1712 . leafdata - leaf data to be update with personal data from each respective root 1713 1714 Level: intermediate 1715 1716 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1717 @*/ 1718 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1719 { 1720 PetscErrorCode ierr; 1721 PetscSF multi; 1722 1723 PetscFunctionBegin; 1724 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1725 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1726 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1727 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 /*@C 1732 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1733 1734 Collective 1735 1736 Input Arguments: 1737 + sf - star forest 1738 . unit - data type 1739 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1740 1741 Output Argument: 1742 . leafdata - leaf data to be update with personal data from each respective root 1743 1744 Level: intermediate 1745 1746 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1747 @*/ 1748 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1749 { 1750 PetscErrorCode ierr; 1751 PetscSF multi; 1752 1753 PetscFunctionBegin; 1754 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1755 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1756 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1757 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1762 { 1763 #if defined(PETSC_USE_DEBUG) 1764 PetscInt i, n, nleaves; 1765 const PetscInt *ilocal = NULL; 1766 PetscHSetI seen; 1767 PetscErrorCode ierr; 1768 1769 PetscFunctionBegin; 1770 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1771 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1772 for (i = 0; i < nleaves; i++) { 1773 const PetscInt leaf = ilocal ? ilocal[i] : i; 1774 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1775 } 1776 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1777 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1778 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 #else 1781 PetscFunctionBegin; 1782 PetscFunctionReturn(0); 1783 #endif 1784 } 1785 1786 /*@ 1787 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1788 1789 Input Parameters: 1790 + sfA - The first PetscSF 1791 - sfB - The second PetscSF 1792 1793 Output Parameters: 1794 . sfBA - The composite SF 1795 1796 Level: developer 1797 1798 Notes: 1799 Currently, the two SFs must be defined on congruent communicators and they must be true star 1800 forests, i.e. the same leaf is not connected with different roots. 1801 1802 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1803 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1804 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1805 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1806 1807 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1808 @*/ 1809 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1810 { 1811 PetscErrorCode ierr; 1812 const PetscSFNode *remotePointsA,*remotePointsB; 1813 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1814 const PetscInt *localPointsA,*localPointsB; 1815 PetscInt *localPointsBA; 1816 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1817 PetscBool denseB; 1818 1819 PetscFunctionBegin; 1820 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1821 PetscSFCheckGraphSet(sfA,1); 1822 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1823 PetscSFCheckGraphSet(sfB,2); 1824 PetscCheckSameComm(sfA,1,sfB,2); 1825 PetscValidPointer(sfBA,3); 1826 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1827 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1828 1829 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1830 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1831 if (localPointsA) { 1832 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1833 for (i=0; i<numRootsB; i++) { 1834 reorderedRemotePointsA[i].rank = -1; 1835 reorderedRemotePointsA[i].index = -1; 1836 } 1837 for (i=0; i<numLeavesA; i++) { 1838 if (localPointsA[i] >= numRootsB) continue; 1839 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1840 } 1841 remotePointsA = reorderedRemotePointsA; 1842 } 1843 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1844 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1845 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1846 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1847 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1848 1849 denseB = (PetscBool)!localPointsB; 1850 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1851 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1852 else numLeavesBA++; 1853 } 1854 if (denseB) { 1855 localPointsBA = NULL; 1856 remotePointsBA = leafdataB; 1857 } else { 1858 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1859 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1860 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1861 const PetscInt l = localPointsB ? localPointsB[i] : i; 1862 1863 if (leafdataB[l-minleaf].rank == -1) continue; 1864 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1865 localPointsBA[numLeavesBA] = l; 1866 numLeavesBA++; 1867 } 1868 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1869 } 1870 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1871 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 /*@ 1876 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1877 1878 Input Parameters: 1879 + sfA - The first PetscSF 1880 - sfB - The second PetscSF 1881 1882 Output Parameters: 1883 . sfBA - The composite SF. 1884 1885 Level: developer 1886 1887 Notes: 1888 Currently, the two SFs must be defined on congruent communicators and they must be true star 1889 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1890 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1891 1892 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1893 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1894 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1895 a Reduce on sfB, on connected roots. 1896 1897 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1898 @*/ 1899 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1900 { 1901 PetscErrorCode ierr; 1902 const PetscSFNode *remotePointsA,*remotePointsB; 1903 PetscSFNode *remotePointsBA; 1904 const PetscInt *localPointsA,*localPointsB; 1905 PetscSFNode *reorderedRemotePointsA = NULL; 1906 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1907 MPI_Op op; 1908 #if defined(PETSC_USE_64BIT_INDICES) 1909 PetscBool iswin; 1910 #endif 1911 1912 PetscFunctionBegin; 1913 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1914 PetscSFCheckGraphSet(sfA,1); 1915 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1916 PetscSFCheckGraphSet(sfB,2); 1917 PetscCheckSameComm(sfA,1,sfB,2); 1918 PetscValidPointer(sfBA,3); 1919 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1920 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1921 1922 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1923 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1924 1925 /* TODO: Check roots of sfB have degree of 1 */ 1926 /* Once we implement it, we can replace the MPI_MAXLOC 1927 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1928 We use MPI_MAXLOC only to have a deterministic output from this routine if 1929 the root condition is not meet. 1930 */ 1931 op = MPI_MAXLOC; 1932 #if defined(PETSC_USE_64BIT_INDICES) 1933 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 1934 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 1935 if (iswin) op = MPIU_REPLACE; 1936 #endif 1937 1938 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1939 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1940 for (i=0; i<maxleaf - minleaf + 1; i++) { 1941 reorderedRemotePointsA[i].rank = -1; 1942 reorderedRemotePointsA[i].index = -1; 1943 } 1944 if (localPointsA) { 1945 for (i=0; i<numLeavesA; i++) { 1946 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1947 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1948 } 1949 } else { 1950 for (i=0; i<numLeavesA; i++) { 1951 if (i > maxleaf || i < minleaf) continue; 1952 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1953 } 1954 } 1955 1956 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1957 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1958 for (i=0; i<numRootsB; i++) { 1959 remotePointsBA[i].rank = -1; 1960 remotePointsBA[i].index = -1; 1961 } 1962 1963 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1964 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1965 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1966 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1967 if (remotePointsBA[i].rank == -1) continue; 1968 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1969 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1970 localPointsBA[numLeavesBA] = i; 1971 numLeavesBA++; 1972 } 1973 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1974 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1975 PetscFunctionReturn(0); 1976 } 1977 1978 /* 1979 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1980 1981 Input Parameters: 1982 . sf - The global PetscSF 1983 1984 Output Parameters: 1985 . out - The local PetscSF 1986 */ 1987 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1988 { 1989 MPI_Comm comm; 1990 PetscMPIInt myrank; 1991 const PetscInt *ilocal; 1992 const PetscSFNode *iremote; 1993 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1994 PetscSFNode *liremote; 1995 PetscSF lsf; 1996 PetscErrorCode ierr; 1997 1998 PetscFunctionBegin; 1999 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2000 if (sf->ops->CreateLocalSF) { 2001 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2002 } else { 2003 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2004 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2005 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2006 2007 /* Find out local edges and build a local SF */ 2008 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2009 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2010 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2011 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2012 2013 for (i=j=0; i<nleaves; i++) { 2014 if (iremote[i].rank == (PetscInt)myrank) { 2015 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2016 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2017 liremote[j].index = iremote[i].index; 2018 j++; 2019 } 2020 } 2021 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2022 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2023 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2024 *out = lsf; 2025 } 2026 PetscFunctionReturn(0); 2027 } 2028 2029 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2030 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2031 { 2032 PetscErrorCode ierr; 2033 PetscMemType rootmtype,leafmtype; 2034 2035 PetscFunctionBegin; 2036 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2037 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2038 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2039 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2040 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2041 if (sf->ops->BcastToZero) { 2042 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2043 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2044 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2045 PetscFunctionReturn(0); 2046 } 2047 2048