1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;} 98 #endif 99 sf->degreeknown = PETSC_FALSE; 100 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 101 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 102 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 103 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 104 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 105 sf->setupcalled = PETSC_FALSE; 106 PetscFunctionReturn(0); 107 } 108 109 /*@C 110 PetscSFSetType - Set the PetscSF communication implementation 111 112 Collective on PetscSF 113 114 Input Parameters: 115 + sf - the PetscSF context 116 - type - a known method 117 118 Options Database Key: 119 . -sf_type <type> - Sets the method; use -help for a list 120 of available methods (for instance, window, basic, neighbor) 121 122 Notes: 123 See "include/petscsf.h" for available methods (for instance) 124 + PETSCSFWINDOW - MPI-2/3 one-sided 125 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 126 127 Level: intermediate 128 129 .seealso: PetscSFType, PetscSFCreate() 130 @*/ 131 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 132 { 133 PetscErrorCode ierr,(*r)(PetscSF); 134 PetscBool match; 135 136 PetscFunctionBegin; 137 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 138 PetscValidCharPointer(type,2); 139 140 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 141 if (match) PetscFunctionReturn(0); 142 143 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 144 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 145 /* Destroy the previous PetscSF implementation context */ 146 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 147 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 148 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 149 ierr = (*r)(sf);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /*@C 154 PetscSFGetType - Get the PetscSF communication implementation 155 156 Not Collective 157 158 Input Parameter: 159 . sf - the PetscSF context 160 161 Output Parameter: 162 . type - the PetscSF type name 163 164 Level: intermediate 165 166 .seealso: PetscSFSetType(), PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 169 { 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 172 PetscValidPointer(type,2); 173 *type = ((PetscObject)sf)->type_name; 174 PetscFunctionReturn(0); 175 } 176 177 /*@ 178 PetscSFDestroy - destroy star forest 179 180 Collective 181 182 Input Arguments: 183 . sf - address of star forest 184 185 Level: intermediate 186 187 .seealso: PetscSFCreate(), PetscSFReset() 188 @*/ 189 PetscErrorCode PetscSFDestroy(PetscSF *sf) 190 { 191 PetscErrorCode ierr; 192 193 PetscFunctionBegin; 194 if (!*sf) PetscFunctionReturn(0); 195 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 196 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 197 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 198 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 199 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 204 { 205 #if defined(PETSC_USE_DEBUG) 206 PetscInt i, nleaves; 207 PetscMPIInt size; 208 const PetscInt *ilocal; 209 const PetscSFNode *iremote; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!sf->graphset) PetscFunctionReturn(0); 214 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 215 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 216 for (i = 0; i < nleaves; i++) { 217 const PetscInt rank = iremote[i].rank; 218 const PetscInt remote = iremote[i].index; 219 const PetscInt leaf = ilocal ? ilocal[i] : i; 220 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 221 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 222 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 223 } 224 PetscFunctionReturn(0); 225 #else 226 PetscFunctionBegin; 227 PetscFunctionReturn(0); 228 #endif 229 } 230 231 /*@ 232 PetscSFSetUp - set up communication structures 233 234 Collective 235 236 Input Arguments: 237 . sf - star forest communication object 238 239 Level: beginner 240 241 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 242 @*/ 243 PetscErrorCode PetscSFSetUp(PetscSF sf) 244 { 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 249 PetscSFCheckGraphSet(sf,1); 250 if (sf->setupcalled) PetscFunctionReturn(0); 251 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 252 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 253 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 254 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 255 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 256 sf->setupcalled = PETSC_TRUE; 257 PetscFunctionReturn(0); 258 } 259 260 /*@ 261 PetscSFSetFromOptions - set PetscSF options using the options database 262 263 Logically Collective 264 265 Input Arguments: 266 . sf - star forest 267 268 Options Database Keys: 269 + -sf_type - implementation type, see PetscSFSetType() 270 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 271 - -sf_use_pinned_buffer - use pinned (nonpagable) memory for send/recv buffers on host when communicating GPU data but GPU-aware MPI is not used. 272 Only available for SF types of basic and neighbor. 273 274 Level: intermediate 275 @*/ 276 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 277 { 278 PetscSFType deft; 279 char type[256]; 280 PetscErrorCode ierr; 281 PetscBool flg; 282 283 PetscFunctionBegin; 284 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 285 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 286 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 287 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 288 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 289 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 290 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 291 ierr = PetscOptionsEnd();CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 /*@ 296 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 297 298 Logically Collective 299 300 Input Arguments: 301 + sf - star forest 302 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 303 304 Level: advanced 305 306 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 307 @*/ 308 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 309 { 310 311 PetscFunctionBegin; 312 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 313 PetscValidLogicalCollectiveBool(sf,flg,2); 314 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 315 sf->rankorder = flg; 316 PetscFunctionReturn(0); 317 } 318 319 /*@ 320 PetscSFSetGraph - Set a parallel star forest 321 322 Collective 323 324 Input Arguments: 325 + sf - star forest 326 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 327 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 328 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 329 during setup in debug mode) 330 . localmode - copy mode for ilocal 331 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 332 during setup in debug mode) 333 - remotemode - copy mode for iremote 334 335 Level: intermediate 336 337 Notes: 338 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 339 340 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 341 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 342 needed 343 344 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 345 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 346 347 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 348 @*/ 349 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 355 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 356 if (nleaves > 0) PetscValidPointer(iremote,6); 357 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 358 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 359 360 if (sf->nroots >= 0) { /* Reset only if graph already set */ 361 ierr = PetscSFReset(sf);CHKERRQ(ierr); 362 } 363 364 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 365 366 sf->nroots = nroots; 367 sf->nleaves = nleaves; 368 369 if (nleaves && ilocal) { 370 PetscInt i; 371 PetscInt minleaf = PETSC_MAX_INT; 372 PetscInt maxleaf = PETSC_MIN_INT; 373 int contiguous = 1; 374 for (i=0; i<nleaves; i++) { 375 minleaf = PetscMin(minleaf,ilocal[i]); 376 maxleaf = PetscMax(maxleaf,ilocal[i]); 377 contiguous &= (ilocal[i] == i); 378 } 379 sf->minleaf = minleaf; 380 sf->maxleaf = maxleaf; 381 if (contiguous) { 382 if (localmode == PETSC_OWN_POINTER) { 383 ierr = PetscFree(ilocal);CHKERRQ(ierr); 384 } 385 ilocal = NULL; 386 } 387 } else { 388 sf->minleaf = 0; 389 sf->maxleaf = nleaves - 1; 390 } 391 392 if (ilocal) { 393 switch (localmode) { 394 case PETSC_COPY_VALUES: 395 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 396 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 397 sf->mine = sf->mine_alloc; 398 break; 399 case PETSC_OWN_POINTER: 400 sf->mine_alloc = (PetscInt*)ilocal; 401 sf->mine = sf->mine_alloc; 402 break; 403 case PETSC_USE_POINTER: 404 sf->mine_alloc = NULL; 405 sf->mine = (PetscInt*)ilocal; 406 break; 407 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 408 } 409 } 410 411 switch (remotemode) { 412 case PETSC_COPY_VALUES: 413 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 414 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 415 sf->remote = sf->remote_alloc; 416 break; 417 case PETSC_OWN_POINTER: 418 sf->remote_alloc = (PetscSFNode*)iremote; 419 sf->remote = sf->remote_alloc; 420 break; 421 case PETSC_USE_POINTER: 422 sf->remote_alloc = NULL; 423 sf->remote = (PetscSFNode*)iremote; 424 break; 425 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 426 } 427 428 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 429 sf->graphset = PETSC_TRUE; 430 PetscFunctionReturn(0); 431 } 432 433 /*@ 434 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 435 436 Collective 437 438 Input Parameters: 439 + sf - The PetscSF 440 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 441 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 442 443 Notes: 444 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 445 n and N are local and global sizes of x respectively. 446 447 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 448 sequential vectors y on all ranks. 449 450 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 451 sequential vector y on rank 0. 452 453 In above cases, entries of x are roots and entries of y are leaves. 454 455 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 456 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 457 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 458 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 459 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 460 461 In this case, roots and leaves are symmetric. 462 463 Level: intermediate 464 @*/ 465 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 466 { 467 MPI_Comm comm; 468 PetscInt n,N,res[2]; 469 PetscMPIInt rank,size; 470 PetscSFType type; 471 PetscErrorCode ierr; 472 473 PetscFunctionBegin; 474 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 475 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 476 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 477 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 478 479 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 480 type = PETSCSFALLTOALL; 481 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 482 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 483 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 484 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 485 } else { 486 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 487 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 488 res[0] = n; 489 res[1] = -n; 490 /* Check if n are same over all ranks so that we can optimize it */ 491 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 492 if (res[0] == -res[1]) { /* same n */ 493 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 494 } else { 495 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 496 } 497 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 498 } 499 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 500 501 sf->pattern = pattern; 502 sf->mine = NULL; /* Contiguous */ 503 504 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 505 Also set other easy stuff. 506 */ 507 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 508 sf->nleaves = N; 509 sf->nroots = n; 510 sf->nranks = size; 511 sf->minleaf = 0; 512 sf->maxleaf = N - 1; 513 } else if (pattern == PETSCSF_PATTERN_GATHER) { 514 sf->nleaves = rank ? 0 : N; 515 sf->nroots = n; 516 sf->nranks = rank ? 0 : size; 517 sf->minleaf = 0; 518 sf->maxleaf = rank ? -1 : N - 1; 519 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 520 sf->nleaves = size; 521 sf->nroots = size; 522 sf->nranks = size; 523 sf->minleaf = 0; 524 sf->maxleaf = size - 1; 525 } 526 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 527 sf->graphset = PETSC_TRUE; 528 PetscFunctionReturn(0); 529 } 530 531 /*@ 532 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 533 534 Collective 535 536 Input Arguments: 537 538 . sf - star forest to invert 539 540 Output Arguments: 541 . isf - inverse of sf 542 Level: advanced 543 544 Notes: 545 All roots must have degree 1. 546 547 The local space may be a permutation, but cannot be sparse. 548 549 .seealso: PetscSFSetGraph() 550 @*/ 551 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 552 { 553 PetscErrorCode ierr; 554 PetscMPIInt rank; 555 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 556 const PetscInt *ilocal; 557 PetscSFNode *roots,*leaves; 558 559 PetscFunctionBegin; 560 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 561 PetscSFCheckGraphSet(sf,1); 562 PetscValidPointer(isf,2); 563 564 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 565 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 566 567 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 568 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 569 for (i=0; i<maxlocal; i++) { 570 leaves[i].rank = rank; 571 leaves[i].index = i; 572 } 573 for (i=0; i <nroots; i++) { 574 roots[i].rank = -1; 575 roots[i].index = -1; 576 } 577 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 578 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 579 580 /* Check whether our leaves are sparse */ 581 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 582 if (count == nroots) newilocal = NULL; 583 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 584 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 585 for (i=0,count=0; i<nroots; i++) { 586 if (roots[i].rank >= 0) { 587 newilocal[count] = i; 588 roots[count].rank = roots[i].rank; 589 roots[count].index = roots[i].index; 590 count++; 591 } 592 } 593 } 594 595 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 596 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 597 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 598 PetscFunctionReturn(0); 599 } 600 601 /*@ 602 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 603 604 Collective 605 606 Input Arguments: 607 + sf - communication object to duplicate 608 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 609 610 Output Arguments: 611 . newsf - new communication object 612 613 Level: beginner 614 615 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 616 @*/ 617 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 618 { 619 PetscSFType type; 620 PetscErrorCode ierr; 621 622 PetscFunctionBegin; 623 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 624 PetscValidLogicalCollectiveEnum(sf,opt,2); 625 PetscValidPointer(newsf,3); 626 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 627 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 628 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 629 if (opt == PETSCSF_DUPLICATE_GRAPH) { 630 PetscSFCheckGraphSet(sf,1); 631 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 632 PetscInt nroots,nleaves; 633 const PetscInt *ilocal; 634 const PetscSFNode *iremote; 635 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 636 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 637 } else { 638 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 639 } 640 } 641 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 642 PetscFunctionReturn(0); 643 } 644 645 /*@C 646 PetscSFGetGraph - Get the graph specifying a parallel star forest 647 648 Not Collective 649 650 Input Arguments: 651 . sf - star forest 652 653 Output Arguments: 654 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 655 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 656 . ilocal - locations of leaves in leafdata buffers 657 - iremote - remote locations of root vertices for each leaf on the current process 658 659 Notes: 660 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 661 662 When called from Fortran, the returned iremote array is a copy and must deallocated after use. Consequently, if you 663 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 664 665 Level: intermediate 666 667 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 668 @*/ 669 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 670 { 671 PetscErrorCode ierr; 672 673 PetscFunctionBegin; 674 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 675 if (sf->ops->GetGraph) { 676 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 677 } else { 678 if (nroots) *nroots = sf->nroots; 679 if (nleaves) *nleaves = sf->nleaves; 680 if (ilocal) *ilocal = sf->mine; 681 if (iremote) *iremote = sf->remote; 682 } 683 PetscFunctionReturn(0); 684 } 685 686 /*@ 687 PetscSFGetLeafRange - Get the active leaf ranges 688 689 Not Collective 690 691 Input Arguments: 692 . sf - star forest 693 694 Output Arguments: 695 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 696 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 697 698 Level: developer 699 700 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 701 @*/ 702 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 703 { 704 705 PetscFunctionBegin; 706 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 707 PetscSFCheckGraphSet(sf,1); 708 if (minleaf) *minleaf = sf->minleaf; 709 if (maxleaf) *maxleaf = sf->maxleaf; 710 PetscFunctionReturn(0); 711 } 712 713 /*@C 714 PetscSFViewFromOptions - View from Options 715 716 Collective on PetscSF 717 718 Input Parameters: 719 + A - the star forest 720 . obj - Optional object 721 - name - command line option 722 723 Level: intermediate 724 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 725 @*/ 726 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 727 { 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 732 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 733 PetscFunctionReturn(0); 734 } 735 736 /*@C 737 PetscSFView - view a star forest 738 739 Collective 740 741 Input Arguments: 742 + sf - star forest 743 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 744 745 Level: beginner 746 747 .seealso: PetscSFCreate(), PetscSFSetGraph() 748 @*/ 749 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 750 { 751 PetscErrorCode ierr; 752 PetscBool iascii; 753 PetscViewerFormat format; 754 755 PetscFunctionBegin; 756 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 757 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 758 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 759 PetscCheckSameComm(sf,1,viewer,2); 760 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 761 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 762 if (iascii) { 763 PetscMPIInt rank; 764 PetscInt ii,i,j; 765 766 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 767 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 768 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 769 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 770 if (!sf->graphset) { 771 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 772 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 773 PetscFunctionReturn(0); 774 } 775 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 776 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 777 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 778 for (i=0; i<sf->nleaves; i++) { 779 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 780 } 781 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 782 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 783 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 784 PetscMPIInt *tmpranks,*perm; 785 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 786 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 787 for (i=0; i<sf->nranks; i++) perm[i] = i; 788 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 789 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 790 for (ii=0; ii<sf->nranks; ii++) { 791 i = perm[ii]; 792 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 793 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 794 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 798 } 799 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 800 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 801 } 802 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 803 } 804 PetscFunctionReturn(0); 805 } 806 807 /*@C 808 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 809 810 Not Collective 811 812 Input Arguments: 813 . sf - star forest 814 815 Output Arguments: 816 + nranks - number of ranks referenced by local part 817 . ranks - array of ranks 818 . roffset - offset in rmine/rremote for each rank (length nranks+1) 819 . rmine - concatenated array holding local indices referencing each remote rank 820 - rremote - concatenated array holding remote indices referenced for each remote rank 821 822 Level: developer 823 824 .seealso: PetscSFGetLeafRanks() 825 @*/ 826 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 827 { 828 PetscErrorCode ierr; 829 830 PetscFunctionBegin; 831 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 832 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 833 if (sf->ops->GetRootRanks) { 834 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 835 } else { 836 /* The generic implementation */ 837 if (nranks) *nranks = sf->nranks; 838 if (ranks) *ranks = sf->ranks; 839 if (roffset) *roffset = sf->roffset; 840 if (rmine) *rmine = sf->rmine; 841 if (rremote) *rremote = sf->rremote; 842 } 843 PetscFunctionReturn(0); 844 } 845 846 /*@C 847 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 848 849 Not Collective 850 851 Input Arguments: 852 . sf - star forest 853 854 Output Arguments: 855 + niranks - number of leaf ranks referencing roots on this process 856 . iranks - array of ranks 857 . ioffset - offset in irootloc for each rank (length niranks+1) 858 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 859 860 Level: developer 861 862 .seealso: PetscSFGetRootRanks() 863 @*/ 864 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 865 { 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 870 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 871 if (sf->ops->GetLeafRanks) { 872 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 873 } else { 874 PetscSFType type; 875 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 876 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 877 } 878 PetscFunctionReturn(0); 879 } 880 881 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 882 PetscInt i; 883 for (i=0; i<n; i++) { 884 if (needle == list[i]) return PETSC_TRUE; 885 } 886 return PETSC_FALSE; 887 } 888 889 /*@C 890 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 891 892 Collective 893 894 Input Arguments: 895 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 896 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 897 898 Level: developer 899 900 .seealso: PetscSFGetRootRanks() 901 @*/ 902 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 903 { 904 PetscErrorCode ierr; 905 PetscTable table; 906 PetscTablePosition pos; 907 PetscMPIInt size,groupsize,*groupranks; 908 PetscInt *rcount,*ranks; 909 PetscInt i, irank = -1,orank = -1; 910 911 PetscFunctionBegin; 912 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 913 PetscSFCheckGraphSet(sf,1); 914 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 915 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 916 for (i=0; i<sf->nleaves; i++) { 917 /* Log 1-based rank */ 918 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 919 } 920 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 921 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 922 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 923 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 924 for (i=0; i<sf->nranks; i++) { 925 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 926 ranks[i]--; /* Convert back to 0-based */ 927 } 928 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 929 930 /* We expect that dgroup is reliably "small" while nranks could be large */ 931 { 932 MPI_Group group = MPI_GROUP_NULL; 933 PetscMPIInt *dgroupranks; 934 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 935 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 936 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 937 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 938 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 939 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 940 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 941 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 942 } 943 944 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 945 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 946 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 947 if (InList(ranks[i],groupsize,groupranks)) break; 948 } 949 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 950 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 951 } 952 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 953 PetscInt tmprank,tmpcount; 954 955 tmprank = ranks[i]; 956 tmpcount = rcount[i]; 957 ranks[i] = ranks[sf->ndranks]; 958 rcount[i] = rcount[sf->ndranks]; 959 ranks[sf->ndranks] = tmprank; 960 rcount[sf->ndranks] = tmpcount; 961 sf->ndranks++; 962 } 963 } 964 ierr = PetscFree(groupranks);CHKERRQ(ierr); 965 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 966 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 967 sf->roffset[0] = 0; 968 for (i=0; i<sf->nranks; i++) { 969 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 970 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 971 rcount[i] = 0; 972 } 973 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 974 /* short circuit */ 975 if (orank != sf->remote[i].rank) { 976 /* Search for index of iremote[i].rank in sf->ranks */ 977 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 978 if (irank < 0) { 979 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 980 if (irank >= 0) irank += sf->ndranks; 981 } 982 orank = sf->remote[i].rank; 983 } 984 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 985 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 986 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 987 rcount[irank]++; 988 } 989 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 /*@C 994 PetscSFGetGroups - gets incoming and outgoing process groups 995 996 Collective 997 998 Input Argument: 999 . sf - star forest 1000 1001 Output Arguments: 1002 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1003 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1004 1005 Level: developer 1006 1007 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1008 @*/ 1009 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1010 { 1011 PetscErrorCode ierr; 1012 MPI_Group group = MPI_GROUP_NULL; 1013 1014 PetscFunctionBegin; 1015 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1016 if (sf->ingroup == MPI_GROUP_NULL) { 1017 PetscInt i; 1018 const PetscInt *indegree; 1019 PetscMPIInt rank,*outranks,*inranks; 1020 PetscSFNode *remote; 1021 PetscSF bgcount; 1022 1023 /* Compute the number of incoming ranks */ 1024 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1025 for (i=0; i<sf->nranks; i++) { 1026 remote[i].rank = sf->ranks[i]; 1027 remote[i].index = 0; 1028 } 1029 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1030 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1031 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1032 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1033 1034 /* Enumerate the incoming ranks */ 1035 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1036 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1037 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1038 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1039 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1040 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1041 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1042 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1043 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1044 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1045 } 1046 *incoming = sf->ingroup; 1047 1048 if (sf->outgroup == MPI_GROUP_NULL) { 1049 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1050 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1051 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1052 } 1053 *outgoing = sf->outgroup; 1054 PetscFunctionReturn(0); 1055 } 1056 1057 /*@ 1058 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1059 1060 Collective 1061 1062 Input Argument: 1063 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1064 1065 Output Arguments: 1066 . multi - star forest with split roots, such that each root has degree exactly 1 1067 1068 Level: developer 1069 1070 Notes: 1071 1072 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1073 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1074 edge, it is a candidate for future optimization that might involve its removal. 1075 1076 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1077 @*/ 1078 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1079 { 1080 PetscErrorCode ierr; 1081 1082 PetscFunctionBegin; 1083 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1084 PetscValidPointer(multi,2); 1085 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1086 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1087 *multi = sf->multi; 1088 PetscFunctionReturn(0); 1089 } 1090 if (!sf->multi) { 1091 const PetscInt *indegree; 1092 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1093 PetscSFNode *remote; 1094 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1095 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1096 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1097 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1098 inoffset[0] = 0; 1099 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1100 for (i=0; i<maxlocal; i++) outones[i] = 1; 1101 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1102 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1103 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1104 #if 0 1105 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1106 for (i=0; i<sf->nroots; i++) { 1107 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1108 } 1109 #endif 1110 #endif 1111 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1112 for (i=0; i<sf->nleaves; i++) { 1113 remote[i].rank = sf->remote[i].rank; 1114 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1115 } 1116 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1117 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1118 if (sf->rankorder) { /* Sort the ranks */ 1119 PetscMPIInt rank; 1120 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1121 PetscSFNode *newremote; 1122 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1123 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1124 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1125 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1126 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1127 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1128 /* Sort the incoming ranks at each vertex, build the inverse map */ 1129 for (i=0; i<sf->nroots; i++) { 1130 PetscInt j; 1131 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1132 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1133 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1134 } 1135 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1136 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1137 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1138 for (i=0; i<sf->nleaves; i++) { 1139 newremote[i].rank = sf->remote[i].rank; 1140 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1141 } 1142 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1143 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1144 } 1145 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1146 } 1147 *multi = sf->multi; 1148 PetscFunctionReturn(0); 1149 } 1150 1151 /*@C 1152 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1153 1154 Collective 1155 1156 Input Arguments: 1157 + sf - original star forest 1158 . nselected - number of selected roots on this process 1159 - selected - indices of the selected roots on this process 1160 1161 Output Arguments: 1162 . newsf - new star forest 1163 1164 Level: advanced 1165 1166 Note: 1167 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1168 be done by calling PetscSFGetGraph(). 1169 1170 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1171 @*/ 1172 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1173 { 1174 PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal; 1175 const PetscSFNode *iremote; 1176 PetscSFNode *new_iremote; 1177 PetscSF tmpsf; 1178 MPI_Comm comm; 1179 PetscErrorCode ierr; 1180 1181 PetscFunctionBegin; 1182 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1183 PetscSFCheckGraphSet(sf,1); 1184 if (nselected) PetscValidPointer(selected,3); 1185 PetscValidPointer(newsf,4); 1186 1187 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1188 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1189 /* Uniq selected[] and put results in roots[] */ 1190 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1191 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 1192 ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr); 1193 ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr); 1194 if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots); 1195 1196 if (sf->ops->CreateEmbeddedSF) { 1197 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr); 1198 } else { 1199 /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */ 1200 /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */ 1201 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 1202 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr); 1203 ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr); 1204 ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr); 1205 for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1; 1206 ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1207 ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1208 ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr); 1209 1210 /* Build newsf with leaves that are still connected */ 1211 connected_leaves = 0; 1212 for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i]; 1213 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 1214 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 1215 for (i=0, n=0; i<nleaves; ++i) { 1216 if (leafdata[i]) { 1217 new_ilocal[n] = sf->mine ? sf->mine[i] : i; 1218 new_iremote[n].rank = sf->remote[i].rank; 1219 new_iremote[n].index = sf->remote[i].index; 1220 ++n; 1221 } 1222 } 1223 1224 if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves); 1225 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1226 ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1227 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 1228 } 1229 ierr = PetscFree(roots);CHKERRQ(ierr); 1230 ierr = PetscSFSetUp(*newsf);CHKERRQ(ierr); 1231 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 /*@C 1236 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1237 1238 Collective 1239 1240 Input Arguments: 1241 + sf - original star forest 1242 . nselected - number of selected leaves on this process 1243 - selected - indices of the selected leaves on this process 1244 1245 Output Arguments: 1246 . newsf - new star forest 1247 1248 Level: advanced 1249 1250 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1251 @*/ 1252 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1253 { 1254 const PetscSFNode *iremote; 1255 PetscSFNode *new_iremote; 1256 const PetscInt *ilocal; 1257 PetscInt i,nroots,*leaves,*new_ilocal; 1258 MPI_Comm comm; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1263 PetscSFCheckGraphSet(sf,1); 1264 if (nselected) PetscValidPointer(selected,3); 1265 PetscValidPointer(newsf,4); 1266 1267 /* Uniq selected[] and put results in leaves[] */ 1268 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1269 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1270 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1271 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1272 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1273 1274 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1275 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1276 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1277 } else { 1278 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1279 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1280 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1281 for (i=0; i<nselected; ++i) { 1282 const PetscInt l = leaves[i]; 1283 new_ilocal[i] = ilocal ? ilocal[l] : l; 1284 new_iremote[i].rank = iremote[l].rank; 1285 new_iremote[i].index = iremote[l].index; 1286 } 1287 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1288 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1289 } 1290 ierr = PetscFree(leaves);CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 /*@C 1295 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1296 1297 Collective on PetscSF 1298 1299 Input Arguments: 1300 + sf - star forest on which to communicate 1301 . unit - data type associated with each node 1302 . rootdata - buffer to broadcast 1303 - op - operation to use for reduction 1304 1305 Output Arguments: 1306 . leafdata - buffer to be reduced with values from each leaf's respective root 1307 1308 Level: intermediate 1309 1310 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1311 @*/ 1312 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1313 { 1314 PetscErrorCode ierr; 1315 PetscMemType rootmtype,leafmtype; 1316 1317 PetscFunctionBegin; 1318 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1319 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1320 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1321 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1322 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1323 #if defined(PETSC_HAVE_CUDA) 1324 /* Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)? 1325 To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream. 1326 But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if 1327 we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also 1328 computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize 1329 is inevitable. We do it now and put pack/unpack kernels to non-NULL streams. 1330 */ 1331 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1332 #endif 1333 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1334 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 /*@C 1339 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1340 1341 Collective 1342 1343 Input Arguments: 1344 + sf - star forest 1345 . unit - data type 1346 . rootdata - buffer to broadcast 1347 - op - operation to use for reduction 1348 1349 Output Arguments: 1350 . leafdata - buffer to be reduced with values from each leaf's respective root 1351 1352 Level: intermediate 1353 1354 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1355 @*/ 1356 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1357 { 1358 PetscErrorCode ierr; 1359 PetscMemType rootmtype,leafmtype; 1360 1361 PetscFunctionBegin; 1362 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1363 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1364 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1365 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1366 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1367 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1368 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 /*@C 1373 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1374 1375 Collective 1376 1377 Input Arguments: 1378 + sf - star forest 1379 . unit - data type 1380 . leafdata - values to reduce 1381 - op - reduction operation 1382 1383 Output Arguments: 1384 . rootdata - result of reduction of values from all leaves of each root 1385 1386 Level: intermediate 1387 1388 .seealso: PetscSFBcastBegin() 1389 @*/ 1390 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1391 { 1392 PetscErrorCode ierr; 1393 PetscMemType rootmtype,leafmtype; 1394 1395 PetscFunctionBegin; 1396 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1397 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1398 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1399 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1400 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1401 #if defined(PETSC_HAVE_CUDA) 1402 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1403 #endif 1404 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1405 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 /*@C 1410 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1411 1412 Collective 1413 1414 Input Arguments: 1415 + sf - star forest 1416 . unit - data type 1417 . leafdata - values to reduce 1418 - op - reduction operation 1419 1420 Output Arguments: 1421 . rootdata - result of reduction of values from all leaves of each root 1422 1423 Level: intermediate 1424 1425 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1426 @*/ 1427 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1428 { 1429 PetscErrorCode ierr; 1430 PetscMemType rootmtype,leafmtype; 1431 1432 PetscFunctionBegin; 1433 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1434 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1435 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1436 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1437 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1438 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1439 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 1443 /*@C 1444 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1445 1446 Collective 1447 1448 Input Arguments: 1449 + sf - star forest 1450 . unit - data type 1451 . leafdata - leaf values to use in reduction 1452 - op - operation to use for reduction 1453 1454 Output Arguments: 1455 + rootdata - root values to be updated, input state is seen by first process to perform an update 1456 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1457 1458 Level: advanced 1459 1460 Note: 1461 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1462 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1463 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1464 integers. 1465 1466 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1467 @*/ 1468 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1469 { 1470 PetscErrorCode ierr; 1471 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1472 1473 PetscFunctionBegin; 1474 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1475 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1476 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1477 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1478 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1479 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1480 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1481 #if defined(PETSC_HAVE_CUDA) 1482 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1483 #endif 1484 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1485 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 /*@C 1490 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1491 1492 Collective 1493 1494 Input Arguments: 1495 + sf - star forest 1496 . unit - data type 1497 . leafdata - leaf values to use in reduction 1498 - op - operation to use for reduction 1499 1500 Output Arguments: 1501 + rootdata - root values to be updated, input state is seen by first process to perform an update 1502 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1503 1504 Level: advanced 1505 1506 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1507 @*/ 1508 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1509 { 1510 PetscErrorCode ierr; 1511 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1512 1513 PetscFunctionBegin; 1514 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1515 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1516 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1517 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1518 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1519 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1520 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1521 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1522 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1523 PetscFunctionReturn(0); 1524 } 1525 1526 /*@C 1527 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1528 1529 Collective 1530 1531 Input Arguments: 1532 . sf - star forest 1533 1534 Output Arguments: 1535 . degree - degree of each root vertex 1536 1537 Level: advanced 1538 1539 Notes: 1540 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1541 1542 .seealso: PetscSFGatherBegin() 1543 @*/ 1544 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1545 { 1546 PetscErrorCode ierr; 1547 1548 PetscFunctionBegin; 1549 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1550 PetscSFCheckGraphSet(sf,1); 1551 PetscValidPointer(degree,2); 1552 if (!sf->degreeknown) { 1553 PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1554 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1555 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1556 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1557 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1558 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1559 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1560 } 1561 *degree = NULL; 1562 PetscFunctionReturn(0); 1563 } 1564 1565 /*@C 1566 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1567 1568 Collective 1569 1570 Input Arguments: 1571 . sf - star forest 1572 1573 Output Arguments: 1574 . degree - degree of each root vertex 1575 1576 Level: developer 1577 1578 Notes: 1579 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1580 1581 .seealso: 1582 @*/ 1583 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1584 { 1585 PetscErrorCode ierr; 1586 1587 PetscFunctionBegin; 1588 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1589 PetscSFCheckGraphSet(sf,1); 1590 PetscValidPointer(degree,2); 1591 if (!sf->degreeknown) { 1592 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1593 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1594 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1595 sf->degreeknown = PETSC_TRUE; 1596 } 1597 *degree = sf->degree; 1598 PetscFunctionReturn(0); 1599 } 1600 1601 1602 /*@C 1603 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1604 Each multi-root is assigned index of the corresponding original root. 1605 1606 Collective 1607 1608 Input Arguments: 1609 + sf - star forest 1610 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1611 1612 Output Arguments: 1613 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1614 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1615 1616 Level: developer 1617 1618 Notes: 1619 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1620 1621 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1622 @*/ 1623 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1624 { 1625 PetscSF msf; 1626 PetscInt i, j, k, nroots, nmroots; 1627 PetscErrorCode ierr; 1628 1629 PetscFunctionBegin; 1630 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1631 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1632 if (nroots) PetscValidIntPointer(degree,2); 1633 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1634 PetscValidPointer(multiRootsOrigNumbering,4); 1635 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1636 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1637 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1638 for (i=0,j=0,k=0; i<nroots; i++) { 1639 if (!degree[i]) continue; 1640 for (j=0; j<degree[i]; j++,k++) { 1641 (*multiRootsOrigNumbering)[k] = i; 1642 } 1643 } 1644 #if defined(PETSC_USE_DEBUG) 1645 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1646 #endif 1647 if (nMultiRoots) *nMultiRoots = nmroots; 1648 PetscFunctionReturn(0); 1649 } 1650 1651 /*@C 1652 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1653 1654 Collective 1655 1656 Input Arguments: 1657 + sf - star forest 1658 . unit - data type 1659 - leafdata - leaf data to gather to roots 1660 1661 Output Argument: 1662 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1663 1664 Level: intermediate 1665 1666 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1667 @*/ 1668 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1669 { 1670 PetscErrorCode ierr; 1671 PetscSF multi; 1672 1673 PetscFunctionBegin; 1674 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1675 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1676 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1677 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 /*@C 1682 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1683 1684 Collective 1685 1686 Input Arguments: 1687 + sf - star forest 1688 . unit - data type 1689 - leafdata - leaf data to gather to roots 1690 1691 Output Argument: 1692 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1693 1694 Level: intermediate 1695 1696 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1697 @*/ 1698 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1699 { 1700 PetscErrorCode ierr; 1701 PetscSF multi; 1702 1703 PetscFunctionBegin; 1704 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1705 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1706 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1707 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 1711 /*@C 1712 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1713 1714 Collective 1715 1716 Input Arguments: 1717 + sf - star forest 1718 . unit - data type 1719 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1720 1721 Output Argument: 1722 . leafdata - leaf data to be update with personal data from each respective root 1723 1724 Level: intermediate 1725 1726 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1727 @*/ 1728 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1729 { 1730 PetscErrorCode ierr; 1731 PetscSF multi; 1732 1733 PetscFunctionBegin; 1734 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1735 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1736 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1737 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 /*@C 1742 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1743 1744 Collective 1745 1746 Input Arguments: 1747 + sf - star forest 1748 . unit - data type 1749 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1750 1751 Output Argument: 1752 . leafdata - leaf data to be update with personal data from each respective root 1753 1754 Level: intermediate 1755 1756 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1757 @*/ 1758 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1759 { 1760 PetscErrorCode ierr; 1761 PetscSF multi; 1762 1763 PetscFunctionBegin; 1764 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1765 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1766 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1767 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1772 { 1773 #if defined(PETSC_USE_DEBUG) 1774 PetscInt i, n, nleaves; 1775 const PetscInt *ilocal = NULL; 1776 PetscHSetI seen; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1781 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1782 for (i = 0; i < nleaves; i++) { 1783 const PetscInt leaf = ilocal ? ilocal[i] : i; 1784 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1785 } 1786 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1787 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1788 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 #else 1791 PetscFunctionBegin; 1792 PetscFunctionReturn(0); 1793 #endif 1794 } 1795 1796 /*@ 1797 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1798 1799 Input Parameters: 1800 + sfA - The first PetscSF 1801 - sfB - The second PetscSF 1802 1803 Output Parameters: 1804 . sfBA - The composite SF 1805 1806 Level: developer 1807 1808 Notes: 1809 Currently, the two SFs must be defined on congruent communicators and they must be true star 1810 forests, i.e. the same leaf is not connected with different roots. 1811 1812 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1813 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1814 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1815 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1816 1817 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1818 @*/ 1819 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1820 { 1821 PetscErrorCode ierr; 1822 const PetscSFNode *remotePointsA,*remotePointsB; 1823 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1824 const PetscInt *localPointsA,*localPointsB; 1825 PetscInt *localPointsBA; 1826 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1827 PetscBool denseB; 1828 1829 PetscFunctionBegin; 1830 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1831 PetscSFCheckGraphSet(sfA,1); 1832 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1833 PetscSFCheckGraphSet(sfB,2); 1834 PetscCheckSameComm(sfA,1,sfB,2); 1835 PetscValidPointer(sfBA,3); 1836 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1837 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1838 1839 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1840 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1841 if (localPointsA) { 1842 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1843 for (i=0; i<numRootsB; i++) { 1844 reorderedRemotePointsA[i].rank = -1; 1845 reorderedRemotePointsA[i].index = -1; 1846 } 1847 for (i=0; i<numLeavesA; i++) { 1848 if (localPointsA[i] >= numRootsB) continue; 1849 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1850 } 1851 remotePointsA = reorderedRemotePointsA; 1852 } 1853 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1854 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1855 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1856 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1857 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1858 1859 denseB = (PetscBool)!localPointsB; 1860 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1861 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1862 else numLeavesBA++; 1863 } 1864 if (denseB) { 1865 localPointsBA = NULL; 1866 remotePointsBA = leafdataB; 1867 } else { 1868 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1869 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1870 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1871 const PetscInt l = localPointsB ? localPointsB[i] : i; 1872 1873 if (leafdataB[l-minleaf].rank == -1) continue; 1874 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1875 localPointsBA[numLeavesBA] = l; 1876 numLeavesBA++; 1877 } 1878 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1879 } 1880 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1881 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 /*@ 1886 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1887 1888 Input Parameters: 1889 + sfA - The first PetscSF 1890 - sfB - The second PetscSF 1891 1892 Output Parameters: 1893 . sfBA - The composite SF. 1894 1895 Level: developer 1896 1897 Notes: 1898 Currently, the two SFs must be defined on congruent communicators and they must be true star 1899 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1900 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1901 1902 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1903 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1904 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1905 a Reduce on sfB, on connected roots. 1906 1907 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1908 @*/ 1909 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1910 { 1911 PetscErrorCode ierr; 1912 const PetscSFNode *remotePointsA,*remotePointsB; 1913 PetscSFNode *remotePointsBA; 1914 const PetscInt *localPointsA,*localPointsB; 1915 PetscSFNode *reorderedRemotePointsA = NULL; 1916 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1917 1918 PetscFunctionBegin; 1919 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1920 PetscSFCheckGraphSet(sfA,1); 1921 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1922 PetscSFCheckGraphSet(sfB,2); 1923 PetscCheckSameComm(sfA,1,sfB,2); 1924 PetscValidPointer(sfBA,3); 1925 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1926 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1927 /* TODO: Check roots of sfB have degree of 1 */ 1928 1929 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1930 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1931 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1932 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1933 for (i=0; i<maxleaf - minleaf + 1; i++) { 1934 reorderedRemotePointsA[i].rank = -1; 1935 reorderedRemotePointsA[i].index = -1; 1936 } 1937 if (localPointsA) { 1938 for (i=0; i<numLeavesA; i++) { 1939 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1940 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1941 } 1942 } else { 1943 for (i=0; i<numLeavesA; i++) { 1944 if (i > maxleaf || i < minleaf) continue; 1945 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1946 } 1947 } 1948 1949 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1950 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1951 for (i=0; i<numRootsB; i++) { 1952 remotePointsBA[i].rank = -1; 1953 remotePointsBA[i].index = -1; 1954 } 1955 1956 /* Once we implement the TODO above (check all roots of sfB have degree of 1), we can replace the MPI_MAXLOC 1957 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1958 */ 1959 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr); 1960 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr); 1961 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1962 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1963 if (remotePointsBA[i].rank == -1) continue; 1964 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1965 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1966 localPointsBA[numLeavesBA] = i; 1967 numLeavesBA++; 1968 } 1969 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1970 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1971 PetscFunctionReturn(0); 1972 } 1973 1974 /* 1975 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1976 1977 Input Parameters: 1978 . sf - The global PetscSF 1979 1980 Output Parameters: 1981 . out - The local PetscSF 1982 */ 1983 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1984 { 1985 MPI_Comm comm; 1986 PetscMPIInt myrank; 1987 const PetscInt *ilocal; 1988 const PetscSFNode *iremote; 1989 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1990 PetscSFNode *liremote; 1991 PetscSF lsf; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1996 if (sf->ops->CreateLocalSF) { 1997 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1998 } else { 1999 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2000 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2001 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2002 2003 /* Find out local edges and build a local SF */ 2004 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2005 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2006 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2007 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2008 2009 for (i=j=0; i<nleaves; i++) { 2010 if (iremote[i].rank == (PetscInt)myrank) { 2011 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2012 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2013 liremote[j].index = iremote[i].index; 2014 j++; 2015 } 2016 } 2017 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2018 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2019 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2020 *out = lsf; 2021 } 2022 PetscFunctionReturn(0); 2023 } 2024 2025 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2026 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2027 { 2028 PetscErrorCode ierr; 2029 PetscMemType rootmtype,leafmtype; 2030 2031 PetscFunctionBegin; 2032 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2033 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2034 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2035 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2036 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2037 if (sf->ops->BcastToZero) { 2038 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2039 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2040 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044