1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 sf->degreeknown = PETSC_FALSE; 97 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 98 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 99 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 100 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 101 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 102 sf->setupcalled = PETSC_FALSE; 103 PetscFunctionReturn(0); 104 } 105 106 /*@C 107 PetscSFSetType - Set the PetscSF communication implementation 108 109 Collective on PetscSF 110 111 Input Parameters: 112 + sf - the PetscSF context 113 - type - a known method 114 115 Options Database Key: 116 . -sf_type <type> - Sets the method; use -help for a list 117 of available methods (for instance, window, pt2pt, neighbor) 118 119 Notes: 120 See "include/petscsf.h" for available methods (for instance) 121 + PETSCSFWINDOW - MPI-2/3 one-sided 122 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 123 124 Level: intermediate 125 126 .seealso: PetscSFType, PetscSFCreate() 127 @*/ 128 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 129 { 130 PetscErrorCode ierr,(*r)(PetscSF); 131 PetscBool match; 132 133 PetscFunctionBegin; 134 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 135 PetscValidCharPointer(type,2); 136 137 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 138 if (match) PetscFunctionReturn(0); 139 140 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 141 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 142 /* Destroy the previous PetscSF implementation context */ 143 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 144 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 145 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 146 ierr = (*r)(sf);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 /*@C 151 PetscSFGetType - Get the PetscSF communication implementation 152 153 Not Collective 154 155 Input Parameter: 156 . sf - the PetscSF context 157 158 Output Parameter: 159 . type - the PetscSF type name 160 161 Level: intermediate 162 163 .seealso: PetscSFSetType(), PetscSFCreate() 164 @*/ 165 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 166 { 167 PetscFunctionBegin; 168 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 169 PetscValidPointer(type,2); 170 *type = ((PetscObject)sf)->type_name; 171 PetscFunctionReturn(0); 172 } 173 174 /*@ 175 PetscSFDestroy - destroy star forest 176 177 Collective 178 179 Input Arguments: 180 . sf - address of star forest 181 182 Level: intermediate 183 184 .seealso: PetscSFCreate(), PetscSFReset() 185 @*/ 186 PetscErrorCode PetscSFDestroy(PetscSF *sf) 187 { 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 if (!*sf) PetscFunctionReturn(0); 192 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 193 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 194 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 195 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 196 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 197 PetscFunctionReturn(0); 198 } 199 200 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 201 { 202 #if defined(PETSC_USE_DEBUG) 203 PetscInt i, nleaves; 204 PetscMPIInt size; 205 const PetscInt *ilocal; 206 const PetscSFNode *iremote; 207 PetscErrorCode ierr; 208 209 PetscFunctionBegin; 210 if (!sf->graphset) PetscFunctionReturn(0); 211 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 212 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 213 for (i = 0; i < nleaves; i++) { 214 const PetscInt rank = iremote[i].rank; 215 const PetscInt remote = iremote[i].index; 216 const PetscInt leaf = ilocal ? ilocal[i] : i; 217 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 218 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 219 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 220 } 221 PetscFunctionReturn(0); 222 #else 223 PetscFunctionBegin; 224 PetscFunctionReturn(0); 225 #endif 226 } 227 228 /*@ 229 PetscSFSetUp - set up communication structures 230 231 Collective 232 233 Input Arguments: 234 . sf - star forest communication object 235 236 Level: beginner 237 238 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 239 @*/ 240 PetscErrorCode PetscSFSetUp(PetscSF sf) 241 { 242 PetscErrorCode ierr; 243 244 PetscFunctionBegin; 245 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 246 PetscSFCheckGraphSet(sf,1); 247 if (sf->setupcalled) PetscFunctionReturn(0); 248 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 249 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 250 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 251 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 252 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 253 sf->setupcalled = PETSC_TRUE; 254 PetscFunctionReturn(0); 255 } 256 257 /*@ 258 PetscSFSetFromOptions - set PetscSF options using the options database 259 260 Logically Collective 261 262 Input Arguments: 263 . sf - star forest 264 265 Options Database Keys: 266 + -sf_type - implementation type, see PetscSFSetType() 267 - -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 268 269 Level: intermediate 270 271 .seealso: PetscSFWindowSetSyncType() 272 @*/ 273 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 274 { 275 PetscSFType deft; 276 char type[256]; 277 PetscErrorCode ierr; 278 PetscBool flg; 279 280 PetscFunctionBegin; 281 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 282 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 283 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 284 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 285 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 286 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 287 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 288 ierr = PetscOptionsEnd();CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 /*@ 293 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 294 295 Logically Collective 296 297 Input Arguments: 298 + sf - star forest 299 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 300 301 Level: advanced 302 303 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 304 @*/ 305 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 306 { 307 308 PetscFunctionBegin; 309 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 310 PetscValidLogicalCollectiveBool(sf,flg,2); 311 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 312 sf->rankorder = flg; 313 PetscFunctionReturn(0); 314 } 315 316 /*@ 317 PetscSFSetGraph - Set a parallel star forest 318 319 Collective 320 321 Input Arguments: 322 + sf - star forest 323 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 324 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 325 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 326 during setup in debug mode) 327 . localmode - copy mode for ilocal 328 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 329 during setup in debug mode) 330 - remotemode - copy mode for iremote 331 332 Level: intermediate 333 334 Notes: 335 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 336 337 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 338 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 339 needed 340 341 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 342 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 343 344 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 345 @*/ 346 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 347 { 348 PetscErrorCode ierr; 349 350 PetscFunctionBegin; 351 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 352 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 353 if (nleaves > 0) PetscValidPointer(iremote,6); 354 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 355 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 356 357 ierr = PetscSFReset(sf);CHKERRQ(ierr); 358 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 359 360 sf->nroots = nroots; 361 sf->nleaves = nleaves; 362 363 if (nleaves && ilocal) { 364 PetscInt i; 365 PetscInt minleaf = PETSC_MAX_INT; 366 PetscInt maxleaf = PETSC_MIN_INT; 367 int contiguous = 1; 368 for (i=0; i<nleaves; i++) { 369 minleaf = PetscMin(minleaf,ilocal[i]); 370 maxleaf = PetscMax(maxleaf,ilocal[i]); 371 contiguous &= (ilocal[i] == i); 372 } 373 sf->minleaf = minleaf; 374 sf->maxleaf = maxleaf; 375 if (contiguous) { 376 if (localmode == PETSC_OWN_POINTER) { 377 ierr = PetscFree(ilocal);CHKERRQ(ierr); 378 } 379 ilocal = NULL; 380 } 381 } else { 382 sf->minleaf = 0; 383 sf->maxleaf = nleaves - 1; 384 } 385 386 if (ilocal) { 387 switch (localmode) { 388 case PETSC_COPY_VALUES: 389 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 390 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 391 sf->mine = sf->mine_alloc; 392 break; 393 case PETSC_OWN_POINTER: 394 sf->mine_alloc = (PetscInt*)ilocal; 395 sf->mine = sf->mine_alloc; 396 break; 397 case PETSC_USE_POINTER: 398 sf->mine_alloc = NULL; 399 sf->mine = (PetscInt*)ilocal; 400 break; 401 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 402 } 403 } 404 405 switch (remotemode) { 406 case PETSC_COPY_VALUES: 407 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 408 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 409 sf->remote = sf->remote_alloc; 410 break; 411 case PETSC_OWN_POINTER: 412 sf->remote_alloc = (PetscSFNode*)iremote; 413 sf->remote = sf->remote_alloc; 414 break; 415 case PETSC_USE_POINTER: 416 sf->remote_alloc = NULL; 417 sf->remote = (PetscSFNode*)iremote; 418 break; 419 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 420 } 421 422 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 423 sf->graphset = PETSC_TRUE; 424 PetscFunctionReturn(0); 425 } 426 427 /*@ 428 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 429 430 Collective 431 432 Input Parameters: 433 + sf - The PetscSF 434 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 435 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 436 437 Notes: 438 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 439 n and N are local and global sizes of x respectively. 440 441 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 442 sequential vectors y on all ranks. 443 444 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 445 sequential vector y on rank 0. 446 447 In above cases, entries of x are roots and entries of y are leaves. 448 449 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 450 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 451 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 452 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 453 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 454 455 In this case, roots and leaves are symmetric. 456 457 Level: intermediate 458 @*/ 459 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 460 { 461 MPI_Comm comm; 462 PetscInt n,N,res[2]; 463 PetscMPIInt rank,size; 464 PetscSFType type; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 469 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 470 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 471 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 472 473 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 474 type = PETSCSFALLTOALL; 475 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 476 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 477 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 478 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 479 } else { 480 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 481 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 482 res[0] = n; 483 res[1] = -n; 484 /* Check if n are same over all ranks so that we can optimize it */ 485 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 486 if (res[0] == -res[1]) { /* same n */ 487 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 488 } else { 489 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 490 } 491 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 492 } 493 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 494 495 sf->pattern = pattern; 496 sf->mine = NULL; /* Contiguous */ 497 498 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 499 Also set other easy stuff. 500 */ 501 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 502 sf->nleaves = N; 503 sf->nroots = n; 504 sf->nranks = size; 505 sf->minleaf = 0; 506 sf->maxleaf = N - 1; 507 } else if (pattern == PETSCSF_PATTERN_GATHER) { 508 sf->nleaves = rank ? 0 : N; 509 sf->nroots = n; 510 sf->nranks = rank ? 0 : size; 511 sf->minleaf = 0; 512 sf->maxleaf = rank ? -1 : N - 1; 513 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 514 sf->nleaves = size; 515 sf->nroots = size; 516 sf->nranks = size; 517 sf->minleaf = 0; 518 sf->maxleaf = size - 1; 519 } 520 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 521 sf->graphset = PETSC_TRUE; 522 PetscFunctionReturn(0); 523 } 524 525 /*@ 526 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 527 528 Collective 529 530 Input Arguments: 531 532 . sf - star forest to invert 533 534 Output Arguments: 535 . isf - inverse of sf 536 Level: advanced 537 538 Notes: 539 All roots must have degree 1. 540 541 The local space may be a permutation, but cannot be sparse. 542 543 .seealso: PetscSFSetGraph() 544 @*/ 545 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 546 { 547 PetscErrorCode ierr; 548 PetscMPIInt rank; 549 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 550 const PetscInt *ilocal; 551 PetscSFNode *roots,*leaves; 552 553 PetscFunctionBegin; 554 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 555 PetscSFCheckGraphSet(sf,1); 556 PetscValidPointer(isf,2); 557 558 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 559 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 560 561 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 562 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 563 for (i=0; i<maxlocal; i++) { 564 leaves[i].rank = rank; 565 leaves[i].index = i; 566 } 567 for (i=0; i <nroots; i++) { 568 roots[i].rank = -1; 569 roots[i].index = -1; 570 } 571 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 572 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 573 574 /* Check whether our leaves are sparse */ 575 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 576 if (count == nroots) newilocal = NULL; 577 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 578 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 579 for (i=0,count=0; i<nroots; i++) { 580 if (roots[i].rank >= 0) { 581 newilocal[count] = i; 582 roots[count].rank = roots[i].rank; 583 roots[count].index = roots[i].index; 584 count++; 585 } 586 } 587 } 588 589 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 590 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 591 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 /*@ 596 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 597 598 Collective 599 600 Input Arguments: 601 + sf - communication object to duplicate 602 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 603 604 Output Arguments: 605 . newsf - new communication object 606 607 Level: beginner 608 609 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 610 @*/ 611 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 612 { 613 PetscSFType type; 614 PetscErrorCode ierr; 615 616 PetscFunctionBegin; 617 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 618 PetscValidLogicalCollectiveEnum(sf,opt,2); 619 PetscValidPointer(newsf,3); 620 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 621 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 622 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 623 if (opt == PETSCSF_DUPLICATE_GRAPH) { 624 PetscSFCheckGraphSet(sf,1); 625 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 626 PetscInt nroots,nleaves; 627 const PetscInt *ilocal; 628 const PetscSFNode *iremote; 629 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 630 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 631 } else { 632 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 633 } 634 } 635 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 636 PetscFunctionReturn(0); 637 } 638 639 /*@C 640 PetscSFGetGraph - Get the graph specifying a parallel star forest 641 642 Not Collective 643 644 Input Arguments: 645 . sf - star forest 646 647 Output Arguments: 648 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 649 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 650 . ilocal - locations of leaves in leafdata buffers 651 - iremote - remote locations of root vertices for each leaf on the current process 652 653 Notes: 654 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 655 656 Level: intermediate 657 658 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 659 @*/ 660 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 661 { 662 PetscErrorCode ierr; 663 664 PetscFunctionBegin; 665 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 666 if (sf->ops->GetGraph) { 667 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 668 } else { 669 if (nroots) *nroots = sf->nroots; 670 if (nleaves) *nleaves = sf->nleaves; 671 if (ilocal) *ilocal = sf->mine; 672 if (iremote) *iremote = sf->remote; 673 } 674 PetscFunctionReturn(0); 675 } 676 677 /*@ 678 PetscSFGetLeafRange - Get the active leaf ranges 679 680 Not Collective 681 682 Input Arguments: 683 . sf - star forest 684 685 Output Arguments: 686 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 687 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 688 689 Level: developer 690 691 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 692 @*/ 693 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 694 { 695 696 PetscFunctionBegin; 697 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 698 PetscSFCheckGraphSet(sf,1); 699 if (minleaf) *minleaf = sf->minleaf; 700 if (maxleaf) *maxleaf = sf->maxleaf; 701 PetscFunctionReturn(0); 702 } 703 704 /*@C 705 PetscSFView - view a star forest 706 707 Collective 708 709 Input Arguments: 710 + sf - star forest 711 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 712 713 Level: beginner 714 715 .seealso: PetscSFCreate(), PetscSFSetGraph() 716 @*/ 717 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 718 { 719 PetscErrorCode ierr; 720 PetscBool iascii; 721 PetscViewerFormat format; 722 723 PetscFunctionBegin; 724 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 725 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 726 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 727 PetscCheckSameComm(sf,1,viewer,2); 728 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 729 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 730 if (iascii) { 731 PetscMPIInt rank; 732 PetscInt ii,i,j; 733 734 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 735 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 736 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 737 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 738 if (!sf->graphset) { 739 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 740 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 744 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 745 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 746 for (i=0; i<sf->nleaves; i++) { 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 748 } 749 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 750 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 751 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 752 PetscMPIInt *tmpranks,*perm; 753 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 754 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 755 for (i=0; i<sf->nranks; i++) perm[i] = i; 756 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 757 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 758 for (ii=0; ii<sf->nranks; ii++) { 759 i = perm[ii]; 760 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 761 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 762 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 763 } 764 } 765 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 766 } 767 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 768 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 769 } 770 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 771 } 772 PetscFunctionReturn(0); 773 } 774 775 /*@C 776 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 777 778 Not Collective 779 780 Input Arguments: 781 . sf - star forest 782 783 Output Arguments: 784 + nranks - number of ranks referenced by local part 785 . ranks - array of ranks 786 . roffset - offset in rmine/rremote for each rank (length nranks+1) 787 . rmine - concatenated array holding local indices referencing each remote rank 788 - rremote - concatenated array holding remote indices referenced for each remote rank 789 790 Level: developer 791 792 .seealso: PetscSFGetLeafRanks() 793 @*/ 794 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 795 { 796 PetscErrorCode ierr; 797 798 PetscFunctionBegin; 799 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 800 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 801 if (sf->ops->GetRootRanks) { 802 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 803 } else { 804 /* The generic implementation */ 805 if (nranks) *nranks = sf->nranks; 806 if (ranks) *ranks = sf->ranks; 807 if (roffset) *roffset = sf->roffset; 808 if (rmine) *rmine = sf->rmine; 809 if (rremote) *rremote = sf->rremote; 810 } 811 PetscFunctionReturn(0); 812 } 813 814 /*@C 815 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 816 817 Not Collective 818 819 Input Arguments: 820 . sf - star forest 821 822 Output Arguments: 823 + niranks - number of leaf ranks referencing roots on this process 824 . iranks - array of ranks 825 . ioffset - offset in irootloc for each rank (length niranks+1) 826 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 827 828 Level: developer 829 830 .seealso: PetscSFGetRootRanks() 831 @*/ 832 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 833 { 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 838 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 839 if (sf->ops->GetLeafRanks) { 840 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 841 } else { 842 PetscSFType type; 843 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 844 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 845 } 846 PetscFunctionReturn(0); 847 } 848 849 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 850 PetscInt i; 851 for (i=0; i<n; i++) { 852 if (needle == list[i]) return PETSC_TRUE; 853 } 854 return PETSC_FALSE; 855 } 856 857 /*@C 858 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 859 860 Collective 861 862 Input Arguments: 863 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 864 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 865 866 Level: developer 867 868 .seealso: PetscSFGetRootRanks() 869 @*/ 870 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 871 { 872 PetscErrorCode ierr; 873 PetscTable table; 874 PetscTablePosition pos; 875 PetscMPIInt size,groupsize,*groupranks; 876 PetscInt *rcount,*ranks; 877 PetscInt i, irank = -1,orank = -1; 878 879 PetscFunctionBegin; 880 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 881 PetscSFCheckGraphSet(sf,1); 882 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 883 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 884 for (i=0; i<sf->nleaves; i++) { 885 /* Log 1-based rank */ 886 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 887 } 888 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 889 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 890 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 891 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 892 for (i=0; i<sf->nranks; i++) { 893 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 894 ranks[i]--; /* Convert back to 0-based */ 895 } 896 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 897 898 /* We expect that dgroup is reliably "small" while nranks could be large */ 899 { 900 MPI_Group group = MPI_GROUP_NULL; 901 PetscMPIInt *dgroupranks; 902 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 903 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 904 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 905 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 906 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 907 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 908 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 909 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 910 } 911 912 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 913 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 914 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 915 if (InList(ranks[i],groupsize,groupranks)) break; 916 } 917 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 918 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 919 } 920 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 921 PetscInt tmprank,tmpcount; 922 923 tmprank = ranks[i]; 924 tmpcount = rcount[i]; 925 ranks[i] = ranks[sf->ndranks]; 926 rcount[i] = rcount[sf->ndranks]; 927 ranks[sf->ndranks] = tmprank; 928 rcount[sf->ndranks] = tmpcount; 929 sf->ndranks++; 930 } 931 } 932 ierr = PetscFree(groupranks);CHKERRQ(ierr); 933 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 934 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 935 sf->roffset[0] = 0; 936 for (i=0; i<sf->nranks; i++) { 937 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 938 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 939 rcount[i] = 0; 940 } 941 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 942 /* short circuit */ 943 if (orank != sf->remote[i].rank) { 944 /* Search for index of iremote[i].rank in sf->ranks */ 945 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 946 if (irank < 0) { 947 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 948 if (irank >= 0) irank += sf->ndranks; 949 } 950 orank = sf->remote[i].rank; 951 } 952 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 953 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 954 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 955 rcount[irank]++; 956 } 957 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 /*@C 962 PetscSFGetGroups - gets incoming and outgoing process groups 963 964 Collective 965 966 Input Argument: 967 . sf - star forest 968 969 Output Arguments: 970 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 971 - outgoing - group of destination processes for outgoing edges (roots that I reference) 972 973 Level: developer 974 975 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 976 @*/ 977 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 978 { 979 PetscErrorCode ierr; 980 MPI_Group group = MPI_GROUP_NULL; 981 982 PetscFunctionBegin; 983 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups"); 984 if (sf->ingroup == MPI_GROUP_NULL) { 985 PetscInt i; 986 const PetscInt *indegree; 987 PetscMPIInt rank,*outranks,*inranks; 988 PetscSFNode *remote; 989 PetscSF bgcount; 990 991 /* Compute the number of incoming ranks */ 992 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 993 for (i=0; i<sf->nranks; i++) { 994 remote[i].rank = sf->ranks[i]; 995 remote[i].index = 0; 996 } 997 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 998 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 999 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1000 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1001 1002 /* Enumerate the incoming ranks */ 1003 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1004 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1005 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1006 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1007 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1008 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1009 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1010 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1011 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1012 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1013 } 1014 *incoming = sf->ingroup; 1015 1016 if (sf->outgroup == MPI_GROUP_NULL) { 1017 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1018 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1019 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1020 } 1021 *outgoing = sf->outgroup; 1022 PetscFunctionReturn(0); 1023 } 1024 1025 /*@ 1026 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1027 1028 Collective 1029 1030 Input Argument: 1031 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1032 1033 Output Arguments: 1034 . multi - star forest with split roots, such that each root has degree exactly 1 1035 1036 Level: developer 1037 1038 Notes: 1039 1040 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1041 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1042 edge, it is a candidate for future optimization that might involve its removal. 1043 1044 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1045 @*/ 1046 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1047 { 1048 PetscErrorCode ierr; 1049 1050 PetscFunctionBegin; 1051 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1052 PetscValidPointer(multi,2); 1053 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1054 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1055 *multi = sf->multi; 1056 PetscFunctionReturn(0); 1057 } 1058 if (!sf->multi) { 1059 const PetscInt *indegree; 1060 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1061 PetscSFNode *remote; 1062 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1063 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1064 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1065 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1066 inoffset[0] = 0; 1067 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1068 for (i=0; i<maxlocal; i++) outones[i] = 1; 1069 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1070 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1071 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1072 #if 0 1073 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1074 for (i=0; i<sf->nroots; i++) { 1075 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1076 } 1077 #endif 1078 #endif 1079 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1080 for (i=0; i<sf->nleaves; i++) { 1081 remote[i].rank = sf->remote[i].rank; 1082 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1083 } 1084 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1085 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1086 if (sf->rankorder) { /* Sort the ranks */ 1087 PetscMPIInt rank; 1088 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1089 PetscSFNode *newremote; 1090 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1091 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1092 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1093 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1094 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1095 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1096 /* Sort the incoming ranks at each vertex, build the inverse map */ 1097 for (i=0; i<sf->nroots; i++) { 1098 PetscInt j; 1099 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1100 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1101 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1102 } 1103 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1104 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1105 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1106 for (i=0; i<sf->nleaves; i++) { 1107 newremote[i].rank = sf->remote[i].rank; 1108 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1109 } 1110 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1111 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1112 } 1113 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1114 } 1115 *multi = sf->multi; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 /*@C 1120 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1121 1122 Collective 1123 1124 Input Arguments: 1125 + sf - original star forest 1126 . nselected - number of selected roots on this process 1127 - selected - indices of the selected roots on this process 1128 1129 Output Arguments: 1130 . newsf - new star forest 1131 1132 Level: advanced 1133 1134 Note: 1135 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1136 be done by calling PetscSFGetGraph(). 1137 1138 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1139 @*/ 1140 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1141 { 1142 PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal; 1143 const PetscSFNode *iremote; 1144 PetscSFNode *new_iremote; 1145 PetscSF tmpsf; 1146 MPI_Comm comm; 1147 PetscErrorCode ierr; 1148 1149 PetscFunctionBegin; 1150 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1151 PetscSFCheckGraphSet(sf,1); 1152 if (nselected) PetscValidPointer(selected,3); 1153 PetscValidPointer(newsf,4); 1154 1155 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1156 1157 /* Uniq selected[] and put results in roots[] */ 1158 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1159 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 1160 ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr); 1161 ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr); 1162 if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots); 1163 1164 if (sf->ops->CreateEmbeddedSF) { 1165 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr); 1166 } else { 1167 /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */ 1168 /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */ 1169 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 1170 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr); 1171 ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr); 1172 ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr); 1173 for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1; 1174 ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1175 ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1176 ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr); 1177 1178 /* Build newsf with leaves that are still connected */ 1179 connected_leaves = 0; 1180 for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i]; 1181 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 1182 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 1183 for (i=0, n=0; i<nleaves; ++i) { 1184 if (leafdata[i]) { 1185 new_ilocal[n] = sf->mine ? sf->mine[i] : i; 1186 new_iremote[n].rank = sf->remote[i].rank; 1187 new_iremote[n].index = sf->remote[i].index; 1188 ++n; 1189 } 1190 } 1191 1192 if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves); 1193 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1194 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1195 ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1196 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 1197 } 1198 ierr = PetscFree(roots);CHKERRQ(ierr); 1199 PetscFunctionReturn(0); 1200 } 1201 1202 /*@C 1203 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1204 1205 Collective 1206 1207 Input Arguments: 1208 + sf - original star forest 1209 . nselected - number of selected leaves on this process 1210 - selected - indices of the selected leaves on this process 1211 1212 Output Arguments: 1213 . newsf - new star forest 1214 1215 Level: advanced 1216 1217 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1218 @*/ 1219 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1220 { 1221 const PetscSFNode *iremote; 1222 PetscSFNode *new_iremote; 1223 const PetscInt *ilocal; 1224 PetscInt i,nroots,*leaves,*new_ilocal; 1225 MPI_Comm comm; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1230 PetscSFCheckGraphSet(sf,1); 1231 if (nselected) PetscValidPointer(selected,3); 1232 PetscValidPointer(newsf,4); 1233 1234 /* Uniq selected[] and put results in leaves[] */ 1235 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1236 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1237 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1238 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1239 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1240 1241 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1242 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1243 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1244 } else { 1245 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1246 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1247 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1248 for (i=0; i<nselected; ++i) { 1249 const PetscInt l = leaves[i]; 1250 new_ilocal[i] = ilocal ? ilocal[l] : l; 1251 new_iremote[i].rank = iremote[l].rank; 1252 new_iremote[i].index = iremote[l].index; 1253 } 1254 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1255 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1256 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1257 } 1258 ierr = PetscFree(leaves);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 /*@C 1263 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1264 1265 Collective on PetscSF 1266 1267 Input Arguments: 1268 + sf - star forest on which to communicate 1269 . unit - data type associated with each node 1270 . rootdata - buffer to broadcast 1271 - op - operation to use for reduction 1272 1273 Output Arguments: 1274 . leafdata - buffer to be reduced with values from each leaf's respective root 1275 1276 Level: intermediate 1277 1278 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1279 @*/ 1280 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1281 { 1282 PetscErrorCode ierr; 1283 1284 PetscFunctionBegin; 1285 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1286 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1287 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1288 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1289 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 /*@C 1294 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1295 1296 Collective 1297 1298 Input Arguments: 1299 + sf - star forest 1300 . unit - data type 1301 . rootdata - buffer to broadcast 1302 - op - operation to use for reduction 1303 1304 Output Arguments: 1305 . leafdata - buffer to be reduced with values from each leaf's respective root 1306 1307 Level: intermediate 1308 1309 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1310 @*/ 1311 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1312 { 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1317 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1318 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1319 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1320 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 /*@C 1325 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1326 1327 Collective 1328 1329 Input Arguments: 1330 + sf - star forest 1331 . unit - data type 1332 . leafdata - values to reduce 1333 - op - reduction operation 1334 1335 Output Arguments: 1336 . rootdata - result of reduction of values from all leaves of each root 1337 1338 Level: intermediate 1339 1340 .seealso: PetscSFBcastBegin() 1341 @*/ 1342 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1343 { 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1348 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1349 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1350 ierr = (sf->ops->ReduceBegin)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1351 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 /*@C 1356 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1357 1358 Collective 1359 1360 Input Arguments: 1361 + sf - star forest 1362 . unit - data type 1363 . leafdata - values to reduce 1364 - op - reduction operation 1365 1366 Output Arguments: 1367 . rootdata - result of reduction of values from all leaves of each root 1368 1369 Level: intermediate 1370 1371 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1372 @*/ 1373 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1374 { 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1379 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1380 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1381 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1382 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 /*@C 1387 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1388 1389 Collective 1390 1391 Input Arguments: 1392 . sf - star forest 1393 1394 Output Arguments: 1395 . degree - degree of each root vertex 1396 1397 Level: advanced 1398 1399 Notes: 1400 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1401 1402 .seealso: PetscSFGatherBegin() 1403 @*/ 1404 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1405 { 1406 PetscErrorCode ierr; 1407 1408 PetscFunctionBegin; 1409 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1410 PetscSFCheckGraphSet(sf,1); 1411 PetscValidPointer(degree,2); 1412 if (!sf->degreeknown) { 1413 PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1414 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1415 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1416 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1417 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1418 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1419 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1420 } 1421 *degree = NULL; 1422 PetscFunctionReturn(0); 1423 } 1424 1425 /*@C 1426 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1427 1428 Collective 1429 1430 Input Arguments: 1431 . sf - star forest 1432 1433 Output Arguments: 1434 . degree - degree of each root vertex 1435 1436 Level: developer 1437 1438 Notes: 1439 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1440 1441 .seealso: 1442 @*/ 1443 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1444 { 1445 PetscErrorCode ierr; 1446 1447 PetscFunctionBegin; 1448 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1449 PetscSFCheckGraphSet(sf,1); 1450 PetscValidPointer(degree,2); 1451 if (!sf->degreeknown) { 1452 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1453 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1454 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1455 sf->degreeknown = PETSC_TRUE; 1456 } 1457 *degree = sf->degree; 1458 PetscFunctionReturn(0); 1459 } 1460 1461 1462 /*@C 1463 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1464 Each multi-root is assigned index of the corresponding original root. 1465 1466 Collective 1467 1468 Input Arguments: 1469 + sf - star forest 1470 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1471 1472 Output Arguments: 1473 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1474 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1475 1476 Level: developer 1477 1478 Notes: 1479 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1480 1481 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1482 @*/ 1483 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1484 { 1485 PetscSF msf; 1486 PetscInt i, j, k, nroots, nmroots; 1487 PetscErrorCode ierr; 1488 1489 PetscFunctionBegin; 1490 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1491 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1492 if (nroots) PetscValidIntPointer(degree,2); 1493 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1494 PetscValidPointer(multiRootsOrigNumbering,4); 1495 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1496 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1497 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1498 for (i=0,j=0,k=0; i<nroots; i++) { 1499 if (!degree[i]) continue; 1500 for (j=0; j<degree[i]; j++,k++) { 1501 (*multiRootsOrigNumbering)[k] = i; 1502 } 1503 } 1504 #if defined(PETSC_USE_DEBUG) 1505 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1506 #endif 1507 if (nMultiRoots) *nMultiRoots = nmroots; 1508 PetscFunctionReturn(0); 1509 } 1510 1511 /*@C 1512 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1513 1514 Collective 1515 1516 Input Arguments: 1517 + sf - star forest 1518 . unit - data type 1519 . leafdata - leaf values to use in reduction 1520 - op - operation to use for reduction 1521 1522 Output Arguments: 1523 + rootdata - root values to be updated, input state is seen by first process to perform an update 1524 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1525 1526 Level: advanced 1527 1528 Note: 1529 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1530 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1531 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1532 integers. 1533 1534 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1535 @*/ 1536 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1537 { 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1542 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1543 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1544 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1545 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 /*@C 1550 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1551 1552 Collective 1553 1554 Input Arguments: 1555 + sf - star forest 1556 . unit - data type 1557 . leafdata - leaf values to use in reduction 1558 - op - operation to use for reduction 1559 1560 Output Arguments: 1561 + rootdata - root values to be updated, input state is seen by first process to perform an update 1562 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1563 1564 Level: advanced 1565 1566 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1567 @*/ 1568 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1569 { 1570 PetscErrorCode ierr; 1571 1572 PetscFunctionBegin; 1573 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1574 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1575 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1576 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1577 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 /*@C 1582 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1583 1584 Collective 1585 1586 Input Arguments: 1587 + sf - star forest 1588 . unit - data type 1589 - leafdata - leaf data to gather to roots 1590 1591 Output Argument: 1592 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1593 1594 Level: intermediate 1595 1596 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1597 @*/ 1598 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1599 { 1600 PetscErrorCode ierr; 1601 PetscSF multi; 1602 1603 PetscFunctionBegin; 1604 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1605 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1606 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1607 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1608 PetscFunctionReturn(0); 1609 } 1610 1611 /*@C 1612 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1613 1614 Collective 1615 1616 Input Arguments: 1617 + sf - star forest 1618 . unit - data type 1619 - leafdata - leaf data to gather to roots 1620 1621 Output Argument: 1622 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1623 1624 Level: intermediate 1625 1626 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1627 @*/ 1628 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1629 { 1630 PetscErrorCode ierr; 1631 PetscSF multi; 1632 1633 PetscFunctionBegin; 1634 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1635 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1636 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1637 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1638 PetscFunctionReturn(0); 1639 } 1640 1641 /*@C 1642 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1643 1644 Collective 1645 1646 Input Arguments: 1647 + sf - star forest 1648 . unit - data type 1649 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1650 1651 Output Argument: 1652 . leafdata - leaf data to be update with personal data from each respective root 1653 1654 Level: intermediate 1655 1656 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1657 @*/ 1658 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1659 { 1660 PetscErrorCode ierr; 1661 PetscSF multi; 1662 1663 PetscFunctionBegin; 1664 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1665 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1666 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1667 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 /*@C 1672 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1673 1674 Collective 1675 1676 Input Arguments: 1677 + sf - star forest 1678 . unit - data type 1679 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1680 1681 Output Argument: 1682 . leafdata - leaf data to be update with personal data from each respective root 1683 1684 Level: intermediate 1685 1686 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1687 @*/ 1688 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1689 { 1690 PetscErrorCode ierr; 1691 PetscSF multi; 1692 1693 PetscFunctionBegin; 1694 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1695 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1696 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1697 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1702 { 1703 #if defined(PETSC_USE_DEBUG) 1704 PetscInt i, n, nleaves; 1705 const PetscInt *ilocal = NULL; 1706 PetscHSetI seen; 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1711 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1712 for (i = 0; i < nleaves; i++) { 1713 const PetscInt leaf = ilocal ? ilocal[i] : i; 1714 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1715 } 1716 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1717 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1718 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 #else 1721 PetscFunctionBegin; 1722 PetscFunctionReturn(0); 1723 #endif 1724 } 1725 /*@ 1726 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1727 1728 Input Parameters: 1729 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse. 1730 - sfB - The second PetscSF, whose number of roots must be equal to number of leaves of sfA on each processor 1731 1732 Output Parameters: 1733 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on sfB 1734 1735 Level: developer 1736 1737 Notes: 1738 For the resulting composed SF to be valid, the input SFs must be true star forests: the leaves must be unique. This is 1739 checked in debug mode. 1740 1741 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1742 @*/ 1743 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1744 { 1745 PetscErrorCode ierr; 1746 MPI_Comm comm; 1747 const PetscSFNode *remotePointsA,*remotePointsB; 1748 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1749 const PetscInt *localPointsA,*localPointsB,*localPointsBA; 1750 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf; 1751 1752 PetscFunctionBegin; 1753 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1754 PetscSFCheckGraphSet(sfA,1); 1755 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1756 PetscSFCheckGraphSet(sfB,2); 1757 PetscValidPointer(sfBA,3); 1758 ierr = PetscObjectGetComm((PetscObject)sfA,&comm);CHKERRQ(ierr); 1759 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1760 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1761 ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr); 1762 if (numRootsB != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of roots must be equal to the first SF's number of leaves"); 1763 if (maxleaf+1 != numLeavesA || minleaf) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space"); 1764 /* The above check is fast, but not sufficient, since we cannot guarantee that the SF has unique leaves. So in debug 1765 mode, check properly. */ 1766 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1767 if (localPointsA) { 1768 /* Local space is dense permutation of identity. Need to rewire order of the remote points */ 1769 ierr = PetscMalloc1(numLeavesA,&reorderedRemotePointsA);CHKERRQ(ierr); 1770 for (i=0; i<numLeavesA; i++) reorderedRemotePointsA[localPointsA[i]-minleaf] = remotePointsA[i]; 1771 remotePointsA = reorderedRemotePointsA; 1772 } 1773 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1774 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1777 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1778 1779 /* sfB's leaves must be unique, otherwise BcastAndOp(B o A) != BcastAndOp(B) o BcastAndOp(A) */ 1780 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1781 if (minleaf == 0 && maxleaf + 1 == numLeavesB) { /* Local space of sfB is an identity or permutation */ 1782 localPointsBA = NULL; 1783 remotePointsBA = leafdataB; 1784 } else { 1785 localPointsBA = localPointsB; 1786 ierr = PetscMalloc1(numLeavesB,&remotePointsBA);CHKERRQ(ierr); 1787 for (i=0; i<numLeavesB; i++) remotePointsBA[i] = leafdataB[localPointsB[i]-minleaf]; 1788 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1789 } 1790 ierr = PetscSFCreate(comm, sfBA);CHKERRQ(ierr); 1791 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesB,localPointsBA,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 /*@ 1796 PetscSFComposeInverse - Compose a new PetscSF by putting inverse of the second SF under the first one 1797 1798 Input Parameters: 1799 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse. 1800 - sfB - The second PetscSF, whose number of leaves must be equal to number of leaves of sfA on each processor. All roots must have degree 1. 1801 1802 Output Parameters: 1803 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on inverse of sfB 1804 1805 Level: developer 1806 1807 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph() 1808 @*/ 1809 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1810 { 1811 PetscErrorCode ierr; 1812 MPI_Comm comm; 1813 const PetscSFNode *remotePointsA,*remotePointsB; 1814 PetscSFNode *remotePointsBA; 1815 const PetscInt *localPointsA,*localPointsB; 1816 PetscInt numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf; 1817 1818 PetscFunctionBegin; 1819 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1820 PetscSFCheckGraphSet(sfA,1); 1821 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1822 PetscSFCheckGraphSet(sfB,2); 1823 PetscValidPointer(sfBA,3); 1824 ierr = PetscObjectGetComm((PetscObject) sfA, &comm);CHKERRQ(ierr); 1825 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1826 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1827 ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr); 1828 if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space"); 1829 if (numLeavesA != numLeavesB) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of leaves must be equal to the first SF's number of leaves"); 1830 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1831 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr); 1832 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr); 1833 ierr = PetscSFCreate(comm,sfBA);CHKERRQ(ierr); 1834 ierr = PetscSFSetGraph(*sfBA,numRootsA,numRootsB,NULL,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 /* 1839 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1840 1841 Input Parameters: 1842 . sf - The global PetscSF 1843 1844 Output Parameters: 1845 . out - The local PetscSF 1846 */ 1847 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1848 { 1849 MPI_Comm comm; 1850 PetscMPIInt myrank; 1851 const PetscInt *ilocal; 1852 const PetscSFNode *iremote; 1853 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1854 PetscSFNode *liremote; 1855 PetscSF lsf; 1856 PetscErrorCode ierr; 1857 1858 PetscFunctionBegin; 1859 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1860 if (sf->ops->CreateLocalSF) { 1861 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1862 } else { 1863 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 1864 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1865 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 1866 1867 /* Find out local edges and build a local SF */ 1868 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1869 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 1870 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 1871 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 1872 1873 for (i=j=0; i<nleaves; i++) { 1874 if (iremote[i].rank == (PetscInt)myrank) { 1875 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 1876 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 1877 liremote[j].index = iremote[i].index; 1878 j++; 1879 } 1880 } 1881 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 1882 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1883 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 1884 *out = lsf; 1885 } 1886 PetscFunctionReturn(0); 1887 } 1888 1889 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 1890 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 1891 { 1892 PetscErrorCode ierr; 1893 1894 PetscFunctionBegin; 1895 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1896 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1897 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1898 if (sf->ops->BcastToZero) { 1899 ierr = (*sf->ops->BcastToZero)(sf,unit,rootdata,leafdata);CHKERRQ(ierr); 1900 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 1901 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1902 PetscFunctionReturn(0); 1903 } 1904 1905