1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;} 98 #endif 99 sf->degreeknown = PETSC_FALSE; 100 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 101 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 102 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 103 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 104 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 105 sf->setupcalled = PETSC_FALSE; 106 PetscFunctionReturn(0); 107 } 108 109 /*@C 110 PetscSFSetType - Set the PetscSF communication implementation 111 112 Collective on PetscSF 113 114 Input Parameters: 115 + sf - the PetscSF context 116 - type - a known method 117 118 Options Database Key: 119 . -sf_type <type> - Sets the method; use -help for a list 120 of available methods (for instance, window, basic, neighbor) 121 122 Notes: 123 See "include/petscsf.h" for available methods (for instance) 124 + PETSCSFWINDOW - MPI-2/3 one-sided 125 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 126 127 Level: intermediate 128 129 .seealso: PetscSFType, PetscSFCreate() 130 @*/ 131 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 132 { 133 PetscErrorCode ierr,(*r)(PetscSF); 134 PetscBool match; 135 136 PetscFunctionBegin; 137 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 138 PetscValidCharPointer(type,2); 139 140 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 141 if (match) PetscFunctionReturn(0); 142 143 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 144 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 145 /* Destroy the previous PetscSF implementation context */ 146 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 147 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 148 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 149 ierr = (*r)(sf);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /*@C 154 PetscSFGetType - Get the PetscSF communication implementation 155 156 Not Collective 157 158 Input Parameter: 159 . sf - the PetscSF context 160 161 Output Parameter: 162 . type - the PetscSF type name 163 164 Level: intermediate 165 166 .seealso: PetscSFSetType(), PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 169 { 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 172 PetscValidPointer(type,2); 173 *type = ((PetscObject)sf)->type_name; 174 PetscFunctionReturn(0); 175 } 176 177 /*@ 178 PetscSFDestroy - destroy star forest 179 180 Collective 181 182 Input Arguments: 183 . sf - address of star forest 184 185 Level: intermediate 186 187 .seealso: PetscSFCreate(), PetscSFReset() 188 @*/ 189 PetscErrorCode PetscSFDestroy(PetscSF *sf) 190 { 191 PetscErrorCode ierr; 192 193 PetscFunctionBegin; 194 if (!*sf) PetscFunctionReturn(0); 195 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 196 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 197 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 198 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 199 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 204 { 205 #if defined(PETSC_USE_DEBUG) 206 PetscInt i, nleaves; 207 PetscMPIInt size; 208 const PetscInt *ilocal; 209 const PetscSFNode *iremote; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!sf->graphset) PetscFunctionReturn(0); 214 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 215 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 216 for (i = 0; i < nleaves; i++) { 217 const PetscInt rank = iremote[i].rank; 218 const PetscInt remote = iremote[i].index; 219 const PetscInt leaf = ilocal ? ilocal[i] : i; 220 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 221 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 222 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 223 } 224 PetscFunctionReturn(0); 225 #else 226 PetscFunctionBegin; 227 PetscFunctionReturn(0); 228 #endif 229 } 230 231 /*@ 232 PetscSFSetUp - set up communication structures 233 234 Collective 235 236 Input Arguments: 237 . sf - star forest communication object 238 239 Level: beginner 240 241 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 242 @*/ 243 PetscErrorCode PetscSFSetUp(PetscSF sf) 244 { 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 249 PetscSFCheckGraphSet(sf,1); 250 if (sf->setupcalled) PetscFunctionReturn(0); 251 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 252 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 253 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 254 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 255 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 256 sf->setupcalled = PETSC_TRUE; 257 PetscFunctionReturn(0); 258 } 259 260 /*@ 261 PetscSFSetFromOptions - set PetscSF options using the options database 262 263 Logically Collective 264 265 Input Arguments: 266 . sf - star forest 267 268 Options Database Keys: 269 + -sf_type - implementation type, see PetscSFSetType() 270 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 271 - -sf_use_pinned_buffer - use pinned (nonpagable) memory for send/recv buffers on host when communicating GPU data but GPU-aware MPI is not used. 272 Only available for SF types of basic and neighbor. 273 274 Level: intermediate 275 276 .seealso: PetscSFWindowSetSyncType() 277 @*/ 278 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 279 { 280 PetscSFType deft; 281 char type[256]; 282 PetscErrorCode ierr; 283 PetscBool flg; 284 285 PetscFunctionBegin; 286 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 287 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 288 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 289 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 290 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 291 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 292 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 293 ierr = PetscOptionsEnd();CHKERRQ(ierr); 294 PetscFunctionReturn(0); 295 } 296 297 /*@ 298 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 299 300 Logically Collective 301 302 Input Arguments: 303 + sf - star forest 304 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 305 306 Level: advanced 307 308 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 309 @*/ 310 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 311 { 312 313 PetscFunctionBegin; 314 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 315 PetscValidLogicalCollectiveBool(sf,flg,2); 316 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 317 sf->rankorder = flg; 318 PetscFunctionReturn(0); 319 } 320 321 /*@ 322 PetscSFSetGraph - Set a parallel star forest 323 324 Collective 325 326 Input Arguments: 327 + sf - star forest 328 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 329 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 330 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 331 during setup in debug mode) 332 . localmode - copy mode for ilocal 333 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 334 during setup in debug mode) 335 - remotemode - copy mode for iremote 336 337 Level: intermediate 338 339 Notes: 340 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 341 342 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 343 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 344 needed 345 346 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 347 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 348 349 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 350 @*/ 351 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 352 { 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 357 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 358 if (nleaves > 0) PetscValidPointer(iremote,6); 359 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 360 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 361 362 if (sf->nroots >= 0) { /* Reset only if graph already set */ 363 ierr = PetscSFReset(sf);CHKERRQ(ierr); 364 } 365 366 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 367 368 sf->nroots = nroots; 369 sf->nleaves = nleaves; 370 371 if (nleaves && ilocal) { 372 PetscInt i; 373 PetscInt minleaf = PETSC_MAX_INT; 374 PetscInt maxleaf = PETSC_MIN_INT; 375 int contiguous = 1; 376 for (i=0; i<nleaves; i++) { 377 minleaf = PetscMin(minleaf,ilocal[i]); 378 maxleaf = PetscMax(maxleaf,ilocal[i]); 379 contiguous &= (ilocal[i] == i); 380 } 381 sf->minleaf = minleaf; 382 sf->maxleaf = maxleaf; 383 if (contiguous) { 384 if (localmode == PETSC_OWN_POINTER) { 385 ierr = PetscFree(ilocal);CHKERRQ(ierr); 386 } 387 ilocal = NULL; 388 } 389 } else { 390 sf->minleaf = 0; 391 sf->maxleaf = nleaves - 1; 392 } 393 394 if (ilocal) { 395 switch (localmode) { 396 case PETSC_COPY_VALUES: 397 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 398 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 399 sf->mine = sf->mine_alloc; 400 break; 401 case PETSC_OWN_POINTER: 402 sf->mine_alloc = (PetscInt*)ilocal; 403 sf->mine = sf->mine_alloc; 404 break; 405 case PETSC_USE_POINTER: 406 sf->mine_alloc = NULL; 407 sf->mine = (PetscInt*)ilocal; 408 break; 409 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 410 } 411 } 412 413 switch (remotemode) { 414 case PETSC_COPY_VALUES: 415 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 416 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 417 sf->remote = sf->remote_alloc; 418 break; 419 case PETSC_OWN_POINTER: 420 sf->remote_alloc = (PetscSFNode*)iremote; 421 sf->remote = sf->remote_alloc; 422 break; 423 case PETSC_USE_POINTER: 424 sf->remote_alloc = NULL; 425 sf->remote = (PetscSFNode*)iremote; 426 break; 427 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 428 } 429 430 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 431 sf->graphset = PETSC_TRUE; 432 PetscFunctionReturn(0); 433 } 434 435 /*@ 436 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 437 438 Collective 439 440 Input Parameters: 441 + sf - The PetscSF 442 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 443 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 444 445 Notes: 446 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 447 n and N are local and global sizes of x respectively. 448 449 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 450 sequential vectors y on all ranks. 451 452 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 453 sequential vector y on rank 0. 454 455 In above cases, entries of x are roots and entries of y are leaves. 456 457 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 458 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 459 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 460 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 461 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 462 463 In this case, roots and leaves are symmetric. 464 465 Level: intermediate 466 @*/ 467 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 468 { 469 MPI_Comm comm; 470 PetscInt n,N,res[2]; 471 PetscMPIInt rank,size; 472 PetscSFType type; 473 PetscErrorCode ierr; 474 475 PetscFunctionBegin; 476 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 477 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 478 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 479 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 480 481 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 482 type = PETSCSFALLTOALL; 483 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 484 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 485 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 486 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 487 } else { 488 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 489 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 490 res[0] = n; 491 res[1] = -n; 492 /* Check if n are same over all ranks so that we can optimize it */ 493 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 494 if (res[0] == -res[1]) { /* same n */ 495 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 496 } else { 497 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 498 } 499 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 500 } 501 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 502 503 sf->pattern = pattern; 504 sf->mine = NULL; /* Contiguous */ 505 506 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 507 Also set other easy stuff. 508 */ 509 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 510 sf->nleaves = N; 511 sf->nroots = n; 512 sf->nranks = size; 513 sf->minleaf = 0; 514 sf->maxleaf = N - 1; 515 } else if (pattern == PETSCSF_PATTERN_GATHER) { 516 sf->nleaves = rank ? 0 : N; 517 sf->nroots = n; 518 sf->nranks = rank ? 0 : size; 519 sf->minleaf = 0; 520 sf->maxleaf = rank ? -1 : N - 1; 521 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 522 sf->nleaves = size; 523 sf->nroots = size; 524 sf->nranks = size; 525 sf->minleaf = 0; 526 sf->maxleaf = size - 1; 527 } 528 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 529 sf->graphset = PETSC_TRUE; 530 PetscFunctionReturn(0); 531 } 532 533 /*@ 534 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 535 536 Collective 537 538 Input Arguments: 539 540 . sf - star forest to invert 541 542 Output Arguments: 543 . isf - inverse of sf 544 Level: advanced 545 546 Notes: 547 All roots must have degree 1. 548 549 The local space may be a permutation, but cannot be sparse. 550 551 .seealso: PetscSFSetGraph() 552 @*/ 553 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 554 { 555 PetscErrorCode ierr; 556 PetscMPIInt rank; 557 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 558 const PetscInt *ilocal; 559 PetscSFNode *roots,*leaves; 560 561 PetscFunctionBegin; 562 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 563 PetscSFCheckGraphSet(sf,1); 564 PetscValidPointer(isf,2); 565 566 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 567 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 568 569 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 570 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 571 for (i=0; i<maxlocal; i++) { 572 leaves[i].rank = rank; 573 leaves[i].index = i; 574 } 575 for (i=0; i <nroots; i++) { 576 roots[i].rank = -1; 577 roots[i].index = -1; 578 } 579 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 580 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 581 582 /* Check whether our leaves are sparse */ 583 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 584 if (count == nroots) newilocal = NULL; 585 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 586 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 587 for (i=0,count=0; i<nroots; i++) { 588 if (roots[i].rank >= 0) { 589 newilocal[count] = i; 590 roots[count].rank = roots[i].rank; 591 roots[count].index = roots[i].index; 592 count++; 593 } 594 } 595 } 596 597 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 598 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 599 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 /*@ 604 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 605 606 Collective 607 608 Input Arguments: 609 + sf - communication object to duplicate 610 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 611 612 Output Arguments: 613 . newsf - new communication object 614 615 Level: beginner 616 617 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 618 @*/ 619 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 620 { 621 PetscSFType type; 622 PetscErrorCode ierr; 623 624 PetscFunctionBegin; 625 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 626 PetscValidLogicalCollectiveEnum(sf,opt,2); 627 PetscValidPointer(newsf,3); 628 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 629 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 630 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 631 if (opt == PETSCSF_DUPLICATE_GRAPH) { 632 PetscSFCheckGraphSet(sf,1); 633 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 634 PetscInt nroots,nleaves; 635 const PetscInt *ilocal; 636 const PetscSFNode *iremote; 637 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 638 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 639 } else { 640 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 641 } 642 } 643 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 644 PetscFunctionReturn(0); 645 } 646 647 /*@C 648 PetscSFGetGraph - Get the graph specifying a parallel star forest 649 650 Not Collective 651 652 Input Arguments: 653 . sf - star forest 654 655 Output Arguments: 656 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 657 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 658 . ilocal - locations of leaves in leafdata buffers 659 - iremote - remote locations of root vertices for each leaf on the current process 660 661 Notes: 662 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 663 664 When called from Fortran, the returned iremote array is a copy and must deallocated after use. Consequently, if you 665 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 666 667 Level: intermediate 668 669 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 670 @*/ 671 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 672 { 673 PetscErrorCode ierr; 674 675 PetscFunctionBegin; 676 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 677 if (sf->ops->GetGraph) { 678 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 679 } else { 680 if (nroots) *nroots = sf->nroots; 681 if (nleaves) *nleaves = sf->nleaves; 682 if (ilocal) *ilocal = sf->mine; 683 if (iremote) *iremote = sf->remote; 684 } 685 PetscFunctionReturn(0); 686 } 687 688 /*@ 689 PetscSFGetLeafRange - Get the active leaf ranges 690 691 Not Collective 692 693 Input Arguments: 694 . sf - star forest 695 696 Output Arguments: 697 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 698 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 699 700 Level: developer 701 702 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 703 @*/ 704 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 705 { 706 707 PetscFunctionBegin; 708 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 709 PetscSFCheckGraphSet(sf,1); 710 if (minleaf) *minleaf = sf->minleaf; 711 if (maxleaf) *maxleaf = sf->maxleaf; 712 PetscFunctionReturn(0); 713 } 714 715 /*@C 716 PetscSFViewFromOptions - View from Options 717 718 Collective on PetscSF 719 720 Input Parameters: 721 + A - the star forest 722 . obj - Optional object 723 - name - command line option 724 725 Level: intermediate 726 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 727 @*/ 728 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 729 { 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 734 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 /*@C 739 PetscSFView - view a star forest 740 741 Collective 742 743 Input Arguments: 744 + sf - star forest 745 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 746 747 Level: beginner 748 749 .seealso: PetscSFCreate(), PetscSFSetGraph() 750 @*/ 751 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 752 { 753 PetscErrorCode ierr; 754 PetscBool iascii; 755 PetscViewerFormat format; 756 757 PetscFunctionBegin; 758 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 759 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 760 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 761 PetscCheckSameComm(sf,1,viewer,2); 762 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 763 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 764 if (iascii) { 765 PetscMPIInt rank; 766 PetscInt ii,i,j; 767 768 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 769 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 770 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 771 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 772 if (!sf->graphset) { 773 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 774 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 775 PetscFunctionReturn(0); 776 } 777 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 778 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 779 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 780 for (i=0; i<sf->nleaves; i++) { 781 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 782 } 783 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 784 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 785 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 786 PetscMPIInt *tmpranks,*perm; 787 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 788 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 789 for (i=0; i<sf->nranks; i++) perm[i] = i; 790 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 791 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 792 for (ii=0; ii<sf->nranks; ii++) { 793 i = perm[ii]; 794 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 795 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 796 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 797 } 798 } 799 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 800 } 801 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 802 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 803 } 804 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 805 } 806 PetscFunctionReturn(0); 807 } 808 809 /*@C 810 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 811 812 Not Collective 813 814 Input Arguments: 815 . sf - star forest 816 817 Output Arguments: 818 + nranks - number of ranks referenced by local part 819 . ranks - array of ranks 820 . roffset - offset in rmine/rremote for each rank (length nranks+1) 821 . rmine - concatenated array holding local indices referencing each remote rank 822 - rremote - concatenated array holding remote indices referenced for each remote rank 823 824 Level: developer 825 826 .seealso: PetscSFGetLeafRanks() 827 @*/ 828 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 829 { 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 834 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 835 if (sf->ops->GetRootRanks) { 836 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 837 } else { 838 /* The generic implementation */ 839 if (nranks) *nranks = sf->nranks; 840 if (ranks) *ranks = sf->ranks; 841 if (roffset) *roffset = sf->roffset; 842 if (rmine) *rmine = sf->rmine; 843 if (rremote) *rremote = sf->rremote; 844 } 845 PetscFunctionReturn(0); 846 } 847 848 /*@C 849 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 850 851 Not Collective 852 853 Input Arguments: 854 . sf - star forest 855 856 Output Arguments: 857 + niranks - number of leaf ranks referencing roots on this process 858 . iranks - array of ranks 859 . ioffset - offset in irootloc for each rank (length niranks+1) 860 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 861 862 Level: developer 863 864 .seealso: PetscSFGetRootRanks() 865 @*/ 866 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 867 { 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 872 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 873 if (sf->ops->GetLeafRanks) { 874 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 875 } else { 876 PetscSFType type; 877 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 878 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 879 } 880 PetscFunctionReturn(0); 881 } 882 883 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 884 PetscInt i; 885 for (i=0; i<n; i++) { 886 if (needle == list[i]) return PETSC_TRUE; 887 } 888 return PETSC_FALSE; 889 } 890 891 /*@C 892 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 893 894 Collective 895 896 Input Arguments: 897 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 898 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 899 900 Level: developer 901 902 .seealso: PetscSFGetRootRanks() 903 @*/ 904 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 905 { 906 PetscErrorCode ierr; 907 PetscTable table; 908 PetscTablePosition pos; 909 PetscMPIInt size,groupsize,*groupranks; 910 PetscInt *rcount,*ranks; 911 PetscInt i, irank = -1,orank = -1; 912 913 PetscFunctionBegin; 914 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 915 PetscSFCheckGraphSet(sf,1); 916 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 917 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 918 for (i=0; i<sf->nleaves; i++) { 919 /* Log 1-based rank */ 920 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 921 } 922 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 923 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 924 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 925 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 926 for (i=0; i<sf->nranks; i++) { 927 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 928 ranks[i]--; /* Convert back to 0-based */ 929 } 930 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 931 932 /* We expect that dgroup is reliably "small" while nranks could be large */ 933 { 934 MPI_Group group = MPI_GROUP_NULL; 935 PetscMPIInt *dgroupranks; 936 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 937 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 938 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 939 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 940 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 941 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 942 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 943 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 944 } 945 946 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 947 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 948 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 949 if (InList(ranks[i],groupsize,groupranks)) break; 950 } 951 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 952 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 953 } 954 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 955 PetscInt tmprank,tmpcount; 956 957 tmprank = ranks[i]; 958 tmpcount = rcount[i]; 959 ranks[i] = ranks[sf->ndranks]; 960 rcount[i] = rcount[sf->ndranks]; 961 ranks[sf->ndranks] = tmprank; 962 rcount[sf->ndranks] = tmpcount; 963 sf->ndranks++; 964 } 965 } 966 ierr = PetscFree(groupranks);CHKERRQ(ierr); 967 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 968 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 969 sf->roffset[0] = 0; 970 for (i=0; i<sf->nranks; i++) { 971 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 972 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 973 rcount[i] = 0; 974 } 975 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 976 /* short circuit */ 977 if (orank != sf->remote[i].rank) { 978 /* Search for index of iremote[i].rank in sf->ranks */ 979 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 980 if (irank < 0) { 981 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 982 if (irank >= 0) irank += sf->ndranks; 983 } 984 orank = sf->remote[i].rank; 985 } 986 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 987 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 988 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 989 rcount[irank]++; 990 } 991 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } 994 995 /*@C 996 PetscSFGetGroups - gets incoming and outgoing process groups 997 998 Collective 999 1000 Input Argument: 1001 . sf - star forest 1002 1003 Output Arguments: 1004 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1005 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1006 1007 Level: developer 1008 1009 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1010 @*/ 1011 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1012 { 1013 PetscErrorCode ierr; 1014 MPI_Group group = MPI_GROUP_NULL; 1015 1016 PetscFunctionBegin; 1017 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1018 if (sf->ingroup == MPI_GROUP_NULL) { 1019 PetscInt i; 1020 const PetscInt *indegree; 1021 PetscMPIInt rank,*outranks,*inranks; 1022 PetscSFNode *remote; 1023 PetscSF bgcount; 1024 1025 /* Compute the number of incoming ranks */ 1026 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1027 for (i=0; i<sf->nranks; i++) { 1028 remote[i].rank = sf->ranks[i]; 1029 remote[i].index = 0; 1030 } 1031 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1032 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1033 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1034 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1035 1036 /* Enumerate the incoming ranks */ 1037 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1038 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1039 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1040 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1041 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1042 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1043 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1044 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1045 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1046 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1047 } 1048 *incoming = sf->ingroup; 1049 1050 if (sf->outgroup == MPI_GROUP_NULL) { 1051 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1052 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1053 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1054 } 1055 *outgoing = sf->outgroup; 1056 PetscFunctionReturn(0); 1057 } 1058 1059 /*@ 1060 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1061 1062 Collective 1063 1064 Input Argument: 1065 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1066 1067 Output Arguments: 1068 . multi - star forest with split roots, such that each root has degree exactly 1 1069 1070 Level: developer 1071 1072 Notes: 1073 1074 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1075 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1076 edge, it is a candidate for future optimization that might involve its removal. 1077 1078 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1079 @*/ 1080 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1081 { 1082 PetscErrorCode ierr; 1083 1084 PetscFunctionBegin; 1085 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1086 PetscValidPointer(multi,2); 1087 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1088 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1089 *multi = sf->multi; 1090 PetscFunctionReturn(0); 1091 } 1092 if (!sf->multi) { 1093 const PetscInt *indegree; 1094 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1095 PetscSFNode *remote; 1096 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1097 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1098 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1099 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1100 inoffset[0] = 0; 1101 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1102 for (i=0; i<maxlocal; i++) outones[i] = 1; 1103 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1104 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1105 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1106 #if 0 1107 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1108 for (i=0; i<sf->nroots; i++) { 1109 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1110 } 1111 #endif 1112 #endif 1113 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1114 for (i=0; i<sf->nleaves; i++) { 1115 remote[i].rank = sf->remote[i].rank; 1116 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1117 } 1118 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1119 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1120 if (sf->rankorder) { /* Sort the ranks */ 1121 PetscMPIInt rank; 1122 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1123 PetscSFNode *newremote; 1124 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1125 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1126 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1127 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1128 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1129 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1130 /* Sort the incoming ranks at each vertex, build the inverse map */ 1131 for (i=0; i<sf->nroots; i++) { 1132 PetscInt j; 1133 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1134 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1135 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1136 } 1137 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1138 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1139 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1140 for (i=0; i<sf->nleaves; i++) { 1141 newremote[i].rank = sf->remote[i].rank; 1142 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1143 } 1144 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1145 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1146 } 1147 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1148 } 1149 *multi = sf->multi; 1150 PetscFunctionReturn(0); 1151 } 1152 1153 /*@C 1154 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1155 1156 Collective 1157 1158 Input Arguments: 1159 + sf - original star forest 1160 . nselected - number of selected roots on this process 1161 - selected - indices of the selected roots on this process 1162 1163 Output Arguments: 1164 . newsf - new star forest 1165 1166 Level: advanced 1167 1168 Note: 1169 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1170 be done by calling PetscSFGetGraph(). 1171 1172 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1173 @*/ 1174 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1175 { 1176 PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal; 1177 const PetscSFNode *iremote; 1178 PetscSFNode *new_iremote; 1179 PetscSF tmpsf; 1180 MPI_Comm comm; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1185 PetscSFCheckGraphSet(sf,1); 1186 if (nselected) PetscValidPointer(selected,3); 1187 PetscValidPointer(newsf,4); 1188 1189 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1190 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1191 /* Uniq selected[] and put results in roots[] */ 1192 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1193 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 1194 ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr); 1195 ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr); 1196 if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots); 1197 1198 if (sf->ops->CreateEmbeddedSF) { 1199 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr); 1200 } else { 1201 /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */ 1202 /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */ 1203 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 1204 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr); 1205 ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr); 1206 ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr); 1207 for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1; 1208 ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1209 ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1210 ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr); 1211 1212 /* Build newsf with leaves that are still connected */ 1213 connected_leaves = 0; 1214 for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i]; 1215 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 1216 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 1217 for (i=0, n=0; i<nleaves; ++i) { 1218 if (leafdata[i]) { 1219 new_ilocal[n] = sf->mine ? sf->mine[i] : i; 1220 new_iremote[n].rank = sf->remote[i].rank; 1221 new_iremote[n].index = sf->remote[i].index; 1222 ++n; 1223 } 1224 } 1225 1226 if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves); 1227 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1228 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1229 ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1230 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 1231 } 1232 ierr = PetscFree(roots);CHKERRQ(ierr); 1233 ierr = PetscSFSetUp(*newsf);CHKERRQ(ierr); 1234 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 /*@C 1239 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1240 1241 Collective 1242 1243 Input Arguments: 1244 + sf - original star forest 1245 . nselected - number of selected leaves on this process 1246 - selected - indices of the selected leaves on this process 1247 1248 Output Arguments: 1249 . newsf - new star forest 1250 1251 Level: advanced 1252 1253 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1254 @*/ 1255 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1256 { 1257 const PetscSFNode *iremote; 1258 PetscSFNode *new_iremote; 1259 const PetscInt *ilocal; 1260 PetscInt i,nroots,*leaves,*new_ilocal; 1261 MPI_Comm comm; 1262 PetscErrorCode ierr; 1263 1264 PetscFunctionBegin; 1265 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1266 PetscSFCheckGraphSet(sf,1); 1267 if (nselected) PetscValidPointer(selected,3); 1268 PetscValidPointer(newsf,4); 1269 1270 /* Uniq selected[] and put results in leaves[] */ 1271 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1272 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1273 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1274 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1275 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1276 1277 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1278 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1279 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1280 } else { 1281 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1282 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1283 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1284 for (i=0; i<nselected; ++i) { 1285 const PetscInt l = leaves[i]; 1286 new_ilocal[i] = ilocal ? ilocal[l] : l; 1287 new_iremote[i].rank = iremote[l].rank; 1288 new_iremote[i].index = iremote[l].index; 1289 } 1290 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1291 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1292 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1293 } 1294 ierr = PetscFree(leaves);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 /*@C 1299 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1300 1301 Collective on PetscSF 1302 1303 Input Arguments: 1304 + sf - star forest on which to communicate 1305 . unit - data type associated with each node 1306 . rootdata - buffer to broadcast 1307 - op - operation to use for reduction 1308 1309 Output Arguments: 1310 . leafdata - buffer to be reduced with values from each leaf's respective root 1311 1312 Level: intermediate 1313 1314 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1315 @*/ 1316 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1317 { 1318 PetscErrorCode ierr; 1319 PetscMemType rootmtype,leafmtype; 1320 1321 PetscFunctionBegin; 1322 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1323 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1324 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1325 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1326 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1327 #if defined(PETSC_HAVE_CUDA) 1328 /* Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)? 1329 To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream. 1330 But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if 1331 we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also 1332 computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize 1333 is inevitable. We do it now and put pack/unpack kernels to non-NULL streams. 1334 */ 1335 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1336 #endif 1337 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1338 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 /*@C 1343 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1344 1345 Collective 1346 1347 Input Arguments: 1348 + sf - star forest 1349 . unit - data type 1350 . rootdata - buffer to broadcast 1351 - op - operation to use for reduction 1352 1353 Output Arguments: 1354 . leafdata - buffer to be reduced with values from each leaf's respective root 1355 1356 Level: intermediate 1357 1358 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1359 @*/ 1360 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1361 { 1362 PetscErrorCode ierr; 1363 PetscMemType rootmtype,leafmtype; 1364 1365 PetscFunctionBegin; 1366 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1367 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1368 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1369 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1370 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1371 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1372 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 /*@C 1377 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1378 1379 Collective 1380 1381 Input Arguments: 1382 + sf - star forest 1383 . unit - data type 1384 . leafdata - values to reduce 1385 - op - reduction operation 1386 1387 Output Arguments: 1388 . rootdata - result of reduction of values from all leaves of each root 1389 1390 Level: intermediate 1391 1392 .seealso: PetscSFBcastBegin() 1393 @*/ 1394 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1395 { 1396 PetscErrorCode ierr; 1397 PetscMemType rootmtype,leafmtype; 1398 1399 PetscFunctionBegin; 1400 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1401 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1402 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1403 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1404 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1405 #if defined(PETSC_HAVE_CUDA) 1406 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1407 #endif 1408 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1409 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 /*@C 1414 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1415 1416 Collective 1417 1418 Input Arguments: 1419 + sf - star forest 1420 . unit - data type 1421 . leafdata - values to reduce 1422 - op - reduction operation 1423 1424 Output Arguments: 1425 . rootdata - result of reduction of values from all leaves of each root 1426 1427 Level: intermediate 1428 1429 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1430 @*/ 1431 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1432 { 1433 PetscErrorCode ierr; 1434 PetscMemType rootmtype,leafmtype; 1435 1436 PetscFunctionBegin; 1437 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1438 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1439 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1440 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1441 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1442 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1443 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /*@C 1448 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1449 1450 Collective 1451 1452 Input Arguments: 1453 + sf - star forest 1454 . unit - data type 1455 . leafdata - leaf values to use in reduction 1456 - op - operation to use for reduction 1457 1458 Output Arguments: 1459 + rootdata - root values to be updated, input state is seen by first process to perform an update 1460 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1461 1462 Level: advanced 1463 1464 Note: 1465 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1466 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1467 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1468 integers. 1469 1470 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1471 @*/ 1472 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1473 { 1474 PetscErrorCode ierr; 1475 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1476 1477 PetscFunctionBegin; 1478 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1479 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1480 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1481 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1482 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1483 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1484 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1485 #if defined(PETSC_HAVE_CUDA) 1486 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1487 #endif 1488 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1489 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 /*@C 1494 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1495 1496 Collective 1497 1498 Input Arguments: 1499 + sf - star forest 1500 . unit - data type 1501 . leafdata - leaf values to use in reduction 1502 - op - operation to use for reduction 1503 1504 Output Arguments: 1505 + rootdata - root values to be updated, input state is seen by first process to perform an update 1506 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1507 1508 Level: advanced 1509 1510 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1511 @*/ 1512 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1513 { 1514 PetscErrorCode ierr; 1515 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1516 1517 PetscFunctionBegin; 1518 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1519 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1520 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1521 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1522 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1523 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1524 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1525 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1526 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 /*@C 1531 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1532 1533 Collective 1534 1535 Input Arguments: 1536 . sf - star forest 1537 1538 Output Arguments: 1539 . degree - degree of each root vertex 1540 1541 Level: advanced 1542 1543 Notes: 1544 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1545 1546 .seealso: PetscSFGatherBegin() 1547 @*/ 1548 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1549 { 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1554 PetscSFCheckGraphSet(sf,1); 1555 PetscValidPointer(degree,2); 1556 if (!sf->degreeknown) { 1557 PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1558 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1559 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1560 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1561 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1562 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1563 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1564 } 1565 *degree = NULL; 1566 PetscFunctionReturn(0); 1567 } 1568 1569 /*@C 1570 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1571 1572 Collective 1573 1574 Input Arguments: 1575 . sf - star forest 1576 1577 Output Arguments: 1578 . degree - degree of each root vertex 1579 1580 Level: developer 1581 1582 Notes: 1583 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1584 1585 .seealso: 1586 @*/ 1587 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1588 { 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1593 PetscSFCheckGraphSet(sf,1); 1594 PetscValidPointer(degree,2); 1595 if (!sf->degreeknown) { 1596 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1597 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1598 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1599 sf->degreeknown = PETSC_TRUE; 1600 } 1601 *degree = sf->degree; 1602 PetscFunctionReturn(0); 1603 } 1604 1605 1606 /*@C 1607 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1608 Each multi-root is assigned index of the corresponding original root. 1609 1610 Collective 1611 1612 Input Arguments: 1613 + sf - star forest 1614 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1615 1616 Output Arguments: 1617 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1618 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1619 1620 Level: developer 1621 1622 Notes: 1623 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1624 1625 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1626 @*/ 1627 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1628 { 1629 PetscSF msf; 1630 PetscInt i, j, k, nroots, nmroots; 1631 PetscErrorCode ierr; 1632 1633 PetscFunctionBegin; 1634 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1635 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1636 if (nroots) PetscValidIntPointer(degree,2); 1637 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1638 PetscValidPointer(multiRootsOrigNumbering,4); 1639 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1640 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1641 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1642 for (i=0,j=0,k=0; i<nroots; i++) { 1643 if (!degree[i]) continue; 1644 for (j=0; j<degree[i]; j++,k++) { 1645 (*multiRootsOrigNumbering)[k] = i; 1646 } 1647 } 1648 #if defined(PETSC_USE_DEBUG) 1649 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1650 #endif 1651 if (nMultiRoots) *nMultiRoots = nmroots; 1652 PetscFunctionReturn(0); 1653 } 1654 1655 /*@C 1656 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1657 1658 Collective 1659 1660 Input Arguments: 1661 + sf - star forest 1662 . unit - data type 1663 - leafdata - leaf data to gather to roots 1664 1665 Output Argument: 1666 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1667 1668 Level: intermediate 1669 1670 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1671 @*/ 1672 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1673 { 1674 PetscErrorCode ierr; 1675 PetscSF multi; 1676 1677 PetscFunctionBegin; 1678 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1679 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1680 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1681 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 /*@C 1686 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1687 1688 Collective 1689 1690 Input Arguments: 1691 + sf - star forest 1692 . unit - data type 1693 - leafdata - leaf data to gather to roots 1694 1695 Output Argument: 1696 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1697 1698 Level: intermediate 1699 1700 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1701 @*/ 1702 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1703 { 1704 PetscErrorCode ierr; 1705 PetscSF multi; 1706 1707 PetscFunctionBegin; 1708 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1709 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1710 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1711 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 /*@C 1716 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1717 1718 Collective 1719 1720 Input Arguments: 1721 + sf - star forest 1722 . unit - data type 1723 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1724 1725 Output Argument: 1726 . leafdata - leaf data to be update with personal data from each respective root 1727 1728 Level: intermediate 1729 1730 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1731 @*/ 1732 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1733 { 1734 PetscErrorCode ierr; 1735 PetscSF multi; 1736 1737 PetscFunctionBegin; 1738 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1739 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1740 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1741 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 /*@C 1746 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1747 1748 Collective 1749 1750 Input Arguments: 1751 + sf - star forest 1752 . unit - data type 1753 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1754 1755 Output Argument: 1756 . leafdata - leaf data to be update with personal data from each respective root 1757 1758 Level: intermediate 1759 1760 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1761 @*/ 1762 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1763 { 1764 PetscErrorCode ierr; 1765 PetscSF multi; 1766 1767 PetscFunctionBegin; 1768 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1769 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1770 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1771 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1772 PetscFunctionReturn(0); 1773 } 1774 1775 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1776 { 1777 #if defined(PETSC_USE_DEBUG) 1778 PetscInt i, n, nleaves; 1779 const PetscInt *ilocal = NULL; 1780 PetscHSetI seen; 1781 PetscErrorCode ierr; 1782 1783 PetscFunctionBegin; 1784 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1785 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1786 for (i = 0; i < nleaves; i++) { 1787 const PetscInt leaf = ilocal ? ilocal[i] : i; 1788 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1789 } 1790 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1791 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1792 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1793 PetscFunctionReturn(0); 1794 #else 1795 PetscFunctionBegin; 1796 PetscFunctionReturn(0); 1797 #endif 1798 } 1799 1800 /*@ 1801 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1802 1803 Input Parameters: 1804 + sfA - The first PetscSF 1805 - sfB - The second PetscSF 1806 1807 Output Parameters: 1808 . sfBA - The composite SF 1809 1810 Level: developer 1811 1812 Notes: 1813 Currently, the two SFs must be defined on congruent communicators and they must be true star 1814 forests, i.e. the same leaf is not connected with different roots. 1815 1816 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1817 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1818 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1819 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1820 1821 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1822 @*/ 1823 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1824 { 1825 PetscErrorCode ierr; 1826 const PetscSFNode *remotePointsA,*remotePointsB; 1827 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1828 const PetscInt *localPointsA,*localPointsB; 1829 PetscInt *localPointsBA; 1830 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1831 PetscBool denseB; 1832 1833 PetscFunctionBegin; 1834 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1835 PetscSFCheckGraphSet(sfA,1); 1836 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1837 PetscSFCheckGraphSet(sfB,2); 1838 PetscCheckSameComm(sfA,1,sfB,2); 1839 PetscValidPointer(sfBA,3); 1840 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1841 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1842 1843 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1844 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1845 if (localPointsA) { 1846 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1847 for (i=0; i<numRootsB; i++) { 1848 reorderedRemotePointsA[i].rank = -1; 1849 reorderedRemotePointsA[i].index = -1; 1850 } 1851 for (i=0; i<numLeavesA; i++) { 1852 if (localPointsA[i] >= numRootsB) continue; 1853 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1854 } 1855 remotePointsA = reorderedRemotePointsA; 1856 } 1857 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1858 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1859 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1860 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1861 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1862 1863 denseB = (PetscBool)!localPointsB; 1864 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1865 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1866 else numLeavesBA++; 1867 } 1868 if (denseB) { 1869 localPointsBA = NULL; 1870 remotePointsBA = leafdataB; 1871 } else { 1872 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1873 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1874 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1875 const PetscInt l = localPointsB ? localPointsB[i] : i; 1876 1877 if (leafdataB[l-minleaf].rank == -1) continue; 1878 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1879 localPointsBA[numLeavesBA] = l; 1880 numLeavesBA++; 1881 } 1882 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1883 } 1884 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1885 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1886 PetscFunctionReturn(0); 1887 } 1888 1889 /*@ 1890 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1891 1892 Input Parameters: 1893 + sfA - The first PetscSF 1894 - sfB - The second PetscSF 1895 1896 Output Parameters: 1897 . sfBA - The composite SF. 1898 1899 Level: developer 1900 1901 Notes: 1902 Currently, the two SFs must be defined on congruent communicators and they must be true star 1903 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1904 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1905 1906 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1907 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1908 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1909 a Reduce on sfB, on connected roots. 1910 1911 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1912 @*/ 1913 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1914 { 1915 PetscErrorCode ierr; 1916 const PetscSFNode *remotePointsA,*remotePointsB; 1917 PetscSFNode *remotePointsBA; 1918 const PetscInt *localPointsA,*localPointsB; 1919 PetscSFNode *reorderedRemotePointsA = NULL; 1920 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1921 1922 PetscFunctionBegin; 1923 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1924 PetscSFCheckGraphSet(sfA,1); 1925 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1926 PetscSFCheckGraphSet(sfB,2); 1927 PetscCheckSameComm(sfA,1,sfB,2); 1928 PetscValidPointer(sfBA,3); 1929 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1930 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1931 /* TODO: Check roots of sfB have degree of 1 */ 1932 1933 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1934 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1935 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1936 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1937 for (i=0; i<maxleaf - minleaf + 1; i++) { 1938 reorderedRemotePointsA[i].rank = -1; 1939 reorderedRemotePointsA[i].index = -1; 1940 } 1941 if (localPointsA) { 1942 for (i=0; i<numLeavesA; i++) { 1943 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1944 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1945 } 1946 } else { 1947 for (i=0; i<numLeavesA; i++) { 1948 if (i > maxleaf || i < minleaf) continue; 1949 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1950 } 1951 } 1952 1953 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1954 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1955 for (i=0; i<numRootsB; i++) { 1956 remotePointsBA[i].rank = -1; 1957 remotePointsBA[i].index = -1; 1958 } 1959 1960 /* Once we implement the TODO above (check all roots of sfB have degree of 1), we can replace the MPI_MAXLOC 1961 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1962 */ 1963 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr); 1964 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr); 1965 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1966 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1967 if (remotePointsBA[i].rank == -1) continue; 1968 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1969 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1970 localPointsBA[numLeavesBA] = i; 1971 numLeavesBA++; 1972 } 1973 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1974 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1975 PetscFunctionReturn(0); 1976 } 1977 1978 /* 1979 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1980 1981 Input Parameters: 1982 . sf - The global PetscSF 1983 1984 Output Parameters: 1985 . out - The local PetscSF 1986 */ 1987 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1988 { 1989 MPI_Comm comm; 1990 PetscMPIInt myrank; 1991 const PetscInt *ilocal; 1992 const PetscSFNode *iremote; 1993 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1994 PetscSFNode *liremote; 1995 PetscSF lsf; 1996 PetscErrorCode ierr; 1997 1998 PetscFunctionBegin; 1999 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2000 if (sf->ops->CreateLocalSF) { 2001 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2002 } else { 2003 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2004 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2005 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2006 2007 /* Find out local edges and build a local SF */ 2008 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2009 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2010 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2011 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2012 2013 for (i=j=0; i<nleaves; i++) { 2014 if (iremote[i].rank == (PetscInt)myrank) { 2015 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2016 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2017 liremote[j].index = iremote[i].index; 2018 j++; 2019 } 2020 } 2021 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2022 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2023 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2024 *out = lsf; 2025 } 2026 PetscFunctionReturn(0); 2027 } 2028 2029 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2030 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2031 { 2032 PetscErrorCode ierr; 2033 PetscMemType rootmtype,leafmtype; 2034 2035 PetscFunctionBegin; 2036 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2037 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2038 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2039 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2040 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2041 if (sf->ops->BcastToZero) { 2042 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2043 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2044 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2045 PetscFunctionReturn(0); 2046 } 2047 2048