1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;} 98 #endif 99 sf->degreeknown = PETSC_FALSE; 100 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 101 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 102 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 103 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 104 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 105 sf->setupcalled = PETSC_FALSE; 106 PetscFunctionReturn(0); 107 } 108 109 /*@C 110 PetscSFSetType - Set the PetscSF communication implementation 111 112 Collective on PetscSF 113 114 Input Parameters: 115 + sf - the PetscSF context 116 - type - a known method 117 118 Options Database Key: 119 . -sf_type <type> - Sets the method; use -help for a list 120 of available methods (for instance, window, pt2pt, neighbor) 121 122 Notes: 123 See "include/petscsf.h" for available methods (for instance) 124 + PETSCSFWINDOW - MPI-2/3 one-sided 125 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 126 127 Level: intermediate 128 129 .seealso: PetscSFType, PetscSFCreate() 130 @*/ 131 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 132 { 133 PetscErrorCode ierr,(*r)(PetscSF); 134 PetscBool match; 135 136 PetscFunctionBegin; 137 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 138 PetscValidCharPointer(type,2); 139 140 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 141 if (match) PetscFunctionReturn(0); 142 143 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 144 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 145 /* Destroy the previous PetscSF implementation context */ 146 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 147 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 148 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 149 ierr = (*r)(sf);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /*@C 154 PetscSFGetType - Get the PetscSF communication implementation 155 156 Not Collective 157 158 Input Parameter: 159 . sf - the PetscSF context 160 161 Output Parameter: 162 . type - the PetscSF type name 163 164 Level: intermediate 165 166 .seealso: PetscSFSetType(), PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 169 { 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 172 PetscValidPointer(type,2); 173 *type = ((PetscObject)sf)->type_name; 174 PetscFunctionReturn(0); 175 } 176 177 /*@ 178 PetscSFDestroy - destroy star forest 179 180 Collective 181 182 Input Arguments: 183 . sf - address of star forest 184 185 Level: intermediate 186 187 .seealso: PetscSFCreate(), PetscSFReset() 188 @*/ 189 PetscErrorCode PetscSFDestroy(PetscSF *sf) 190 { 191 PetscErrorCode ierr; 192 193 PetscFunctionBegin; 194 if (!*sf) PetscFunctionReturn(0); 195 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 196 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 197 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 198 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 199 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 204 { 205 #if defined(PETSC_USE_DEBUG) 206 PetscInt i, nleaves; 207 PetscMPIInt size; 208 const PetscInt *ilocal; 209 const PetscSFNode *iremote; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!sf->graphset) PetscFunctionReturn(0); 214 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 215 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 216 for (i = 0; i < nleaves; i++) { 217 const PetscInt rank = iremote[i].rank; 218 const PetscInt remote = iremote[i].index; 219 const PetscInt leaf = ilocal ? ilocal[i] : i; 220 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 221 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 222 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 223 } 224 PetscFunctionReturn(0); 225 #else 226 PetscFunctionBegin; 227 PetscFunctionReturn(0); 228 #endif 229 } 230 231 /*@ 232 PetscSFSetUp - set up communication structures 233 234 Collective 235 236 Input Arguments: 237 . sf - star forest communication object 238 239 Level: beginner 240 241 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 242 @*/ 243 PetscErrorCode PetscSFSetUp(PetscSF sf) 244 { 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 249 PetscSFCheckGraphSet(sf,1); 250 if (sf->setupcalled) PetscFunctionReturn(0); 251 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 252 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 253 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 254 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 255 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 256 sf->setupcalled = PETSC_TRUE; 257 PetscFunctionReturn(0); 258 } 259 260 /*@ 261 PetscSFSetFromOptions - set PetscSF options using the options database 262 263 Logically Collective 264 265 Input Arguments: 266 . sf - star forest 267 268 Options Database Keys: 269 + -sf_type - implementation type, see PetscSFSetType() 270 - -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 271 272 Level: intermediate 273 274 .seealso: PetscSFWindowSetSyncType() 275 @*/ 276 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 277 { 278 PetscSFType deft; 279 char type[256]; 280 PetscErrorCode ierr; 281 PetscBool flg; 282 283 PetscFunctionBegin; 284 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 285 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 286 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 287 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 288 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 289 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 290 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 291 ierr = PetscOptionsEnd();CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 /*@ 296 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 297 298 Logically Collective 299 300 Input Arguments: 301 + sf - star forest 302 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 303 304 Level: advanced 305 306 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 307 @*/ 308 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 309 { 310 311 PetscFunctionBegin; 312 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 313 PetscValidLogicalCollectiveBool(sf,flg,2); 314 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 315 sf->rankorder = flg; 316 PetscFunctionReturn(0); 317 } 318 319 /*@ 320 PetscSFSetGraph - Set a parallel star forest 321 322 Collective 323 324 Input Arguments: 325 + sf - star forest 326 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 327 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 328 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 329 during setup in debug mode) 330 . localmode - copy mode for ilocal 331 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 332 during setup in debug mode) 333 - remotemode - copy mode for iremote 334 335 Level: intermediate 336 337 Notes: 338 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 339 340 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 341 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 342 needed 343 344 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 345 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 346 347 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 348 @*/ 349 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 355 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 356 if (nleaves > 0) PetscValidPointer(iremote,6); 357 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 358 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 359 360 ierr = PetscSFReset(sf);CHKERRQ(ierr); 361 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 362 363 sf->nroots = nroots; 364 sf->nleaves = nleaves; 365 366 if (nleaves && ilocal) { 367 PetscInt i; 368 PetscInt minleaf = PETSC_MAX_INT; 369 PetscInt maxleaf = PETSC_MIN_INT; 370 int contiguous = 1; 371 for (i=0; i<nleaves; i++) { 372 minleaf = PetscMin(minleaf,ilocal[i]); 373 maxleaf = PetscMax(maxleaf,ilocal[i]); 374 contiguous &= (ilocal[i] == i); 375 } 376 sf->minleaf = minleaf; 377 sf->maxleaf = maxleaf; 378 if (contiguous) { 379 if (localmode == PETSC_OWN_POINTER) { 380 ierr = PetscFree(ilocal);CHKERRQ(ierr); 381 } 382 ilocal = NULL; 383 } 384 } else { 385 sf->minleaf = 0; 386 sf->maxleaf = nleaves - 1; 387 } 388 389 if (ilocal) { 390 switch (localmode) { 391 case PETSC_COPY_VALUES: 392 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 393 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 394 sf->mine = sf->mine_alloc; 395 break; 396 case PETSC_OWN_POINTER: 397 sf->mine_alloc = (PetscInt*)ilocal; 398 sf->mine = sf->mine_alloc; 399 break; 400 case PETSC_USE_POINTER: 401 sf->mine_alloc = NULL; 402 sf->mine = (PetscInt*)ilocal; 403 break; 404 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 405 } 406 } 407 408 switch (remotemode) { 409 case PETSC_COPY_VALUES: 410 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 411 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 412 sf->remote = sf->remote_alloc; 413 break; 414 case PETSC_OWN_POINTER: 415 sf->remote_alloc = (PetscSFNode*)iremote; 416 sf->remote = sf->remote_alloc; 417 break; 418 case PETSC_USE_POINTER: 419 sf->remote_alloc = NULL; 420 sf->remote = (PetscSFNode*)iremote; 421 break; 422 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 423 } 424 425 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 426 sf->graphset = PETSC_TRUE; 427 PetscFunctionReturn(0); 428 } 429 430 /*@ 431 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 432 433 Collective 434 435 Input Parameters: 436 + sf - The PetscSF 437 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 438 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 439 440 Notes: 441 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 442 n and N are local and global sizes of x respectively. 443 444 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 445 sequential vectors y on all ranks. 446 447 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 448 sequential vector y on rank 0. 449 450 In above cases, entries of x are roots and entries of y are leaves. 451 452 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 453 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 454 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 455 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 456 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 457 458 In this case, roots and leaves are symmetric. 459 460 Level: intermediate 461 @*/ 462 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 463 { 464 MPI_Comm comm; 465 PetscInt n,N,res[2]; 466 PetscMPIInt rank,size; 467 PetscSFType type; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 472 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 473 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 474 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 475 476 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 477 type = PETSCSFALLTOALL; 478 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 479 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 480 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 481 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 482 } else { 483 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 484 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 485 res[0] = n; 486 res[1] = -n; 487 /* Check if n are same over all ranks so that we can optimize it */ 488 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 489 if (res[0] == -res[1]) { /* same n */ 490 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 491 } else { 492 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 493 } 494 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 495 } 496 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 497 498 sf->pattern = pattern; 499 sf->mine = NULL; /* Contiguous */ 500 501 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 502 Also set other easy stuff. 503 */ 504 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 505 sf->nleaves = N; 506 sf->nroots = n; 507 sf->nranks = size; 508 sf->minleaf = 0; 509 sf->maxleaf = N - 1; 510 } else if (pattern == PETSCSF_PATTERN_GATHER) { 511 sf->nleaves = rank ? 0 : N; 512 sf->nroots = n; 513 sf->nranks = rank ? 0 : size; 514 sf->minleaf = 0; 515 sf->maxleaf = rank ? -1 : N - 1; 516 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 517 sf->nleaves = size; 518 sf->nroots = size; 519 sf->nranks = size; 520 sf->minleaf = 0; 521 sf->maxleaf = size - 1; 522 } 523 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 524 sf->graphset = PETSC_TRUE; 525 PetscFunctionReturn(0); 526 } 527 528 /*@ 529 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 530 531 Collective 532 533 Input Arguments: 534 535 . sf - star forest to invert 536 537 Output Arguments: 538 . isf - inverse of sf 539 Level: advanced 540 541 Notes: 542 All roots must have degree 1. 543 544 The local space may be a permutation, but cannot be sparse. 545 546 .seealso: PetscSFSetGraph() 547 @*/ 548 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 549 { 550 PetscErrorCode ierr; 551 PetscMPIInt rank; 552 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 553 const PetscInt *ilocal; 554 PetscSFNode *roots,*leaves; 555 556 PetscFunctionBegin; 557 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 558 PetscSFCheckGraphSet(sf,1); 559 PetscValidPointer(isf,2); 560 561 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 562 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 563 564 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 565 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 566 for (i=0; i<maxlocal; i++) { 567 leaves[i].rank = rank; 568 leaves[i].index = i; 569 } 570 for (i=0; i <nroots; i++) { 571 roots[i].rank = -1; 572 roots[i].index = -1; 573 } 574 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 575 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 576 577 /* Check whether our leaves are sparse */ 578 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 579 if (count == nroots) newilocal = NULL; 580 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 581 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 582 for (i=0,count=0; i<nroots; i++) { 583 if (roots[i].rank >= 0) { 584 newilocal[count] = i; 585 roots[count].rank = roots[i].rank; 586 roots[count].index = roots[i].index; 587 count++; 588 } 589 } 590 } 591 592 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 593 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 594 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 /*@ 599 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 600 601 Collective 602 603 Input Arguments: 604 + sf - communication object to duplicate 605 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 606 607 Output Arguments: 608 . newsf - new communication object 609 610 Level: beginner 611 612 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 613 @*/ 614 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 615 { 616 PetscSFType type; 617 PetscErrorCode ierr; 618 619 PetscFunctionBegin; 620 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 621 PetscValidLogicalCollectiveEnum(sf,opt,2); 622 PetscValidPointer(newsf,3); 623 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 624 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 625 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 626 if (opt == PETSCSF_DUPLICATE_GRAPH) { 627 PetscSFCheckGraphSet(sf,1); 628 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 629 PetscInt nroots,nleaves; 630 const PetscInt *ilocal; 631 const PetscSFNode *iremote; 632 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 633 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 634 } else { 635 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 636 } 637 } 638 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 639 PetscFunctionReturn(0); 640 } 641 642 /*@C 643 PetscSFGetGraph - Get the graph specifying a parallel star forest 644 645 Not Collective 646 647 Input Arguments: 648 . sf - star forest 649 650 Output Arguments: 651 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 652 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 653 . ilocal - locations of leaves in leafdata buffers 654 - iremote - remote locations of root vertices for each leaf on the current process 655 656 Notes: 657 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 658 659 Level: intermediate 660 661 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 662 @*/ 663 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 664 { 665 PetscErrorCode ierr; 666 667 PetscFunctionBegin; 668 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 669 if (sf->ops->GetGraph) { 670 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 671 } else { 672 if (nroots) *nroots = sf->nroots; 673 if (nleaves) *nleaves = sf->nleaves; 674 if (ilocal) *ilocal = sf->mine; 675 if (iremote) *iremote = sf->remote; 676 } 677 PetscFunctionReturn(0); 678 } 679 680 /*@ 681 PetscSFGetLeafRange - Get the active leaf ranges 682 683 Not Collective 684 685 Input Arguments: 686 . sf - star forest 687 688 Output Arguments: 689 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 690 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 691 692 Level: developer 693 694 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 695 @*/ 696 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 697 { 698 699 PetscFunctionBegin; 700 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 701 PetscSFCheckGraphSet(sf,1); 702 if (minleaf) *minleaf = sf->minleaf; 703 if (maxleaf) *maxleaf = sf->maxleaf; 704 PetscFunctionReturn(0); 705 } 706 707 /*@C 708 PetscSFViewFromOptions - View from Options 709 710 Collective on PetscSF 711 712 Input Parameters: 713 + A - the star forest 714 - obj - Optional object 715 . name - command line option 716 717 Level: intermediate 718 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 719 @*/ 720 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 721 { 722 PetscErrorCode ierr; 723 724 PetscFunctionBegin; 725 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 726 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 /*@C 731 PetscSFView - view a star forest 732 733 Collective 734 735 Input Arguments: 736 + sf - star forest 737 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 738 739 Level: beginner 740 741 .seealso: PetscSFCreate(), PetscSFSetGraph() 742 @*/ 743 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 744 { 745 PetscErrorCode ierr; 746 PetscBool iascii; 747 PetscViewerFormat format; 748 749 PetscFunctionBegin; 750 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 751 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 752 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 753 PetscCheckSameComm(sf,1,viewer,2); 754 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 755 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 756 if (iascii) { 757 PetscMPIInt rank; 758 PetscInt ii,i,j; 759 760 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 761 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 762 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 763 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 764 if (!sf->graphset) { 765 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 766 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 770 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 771 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 772 for (i=0; i<sf->nleaves; i++) { 773 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 774 } 775 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 776 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 777 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 778 PetscMPIInt *tmpranks,*perm; 779 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 780 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 781 for (i=0; i<sf->nranks; i++) perm[i] = i; 782 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 783 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 784 for (ii=0; ii<sf->nranks; ii++) { 785 i = perm[ii]; 786 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 787 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 788 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 789 } 790 } 791 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 792 } 793 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 794 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 795 } 796 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 797 } 798 PetscFunctionReturn(0); 799 } 800 801 /*@C 802 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 803 804 Not Collective 805 806 Input Arguments: 807 . sf - star forest 808 809 Output Arguments: 810 + nranks - number of ranks referenced by local part 811 . ranks - array of ranks 812 . roffset - offset in rmine/rremote for each rank (length nranks+1) 813 . rmine - concatenated array holding local indices referencing each remote rank 814 - rremote - concatenated array holding remote indices referenced for each remote rank 815 816 Level: developer 817 818 .seealso: PetscSFGetLeafRanks() 819 @*/ 820 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 821 { 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 826 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 827 if (sf->ops->GetRootRanks) { 828 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 829 } else { 830 /* The generic implementation */ 831 if (nranks) *nranks = sf->nranks; 832 if (ranks) *ranks = sf->ranks; 833 if (roffset) *roffset = sf->roffset; 834 if (rmine) *rmine = sf->rmine; 835 if (rremote) *rremote = sf->rremote; 836 } 837 PetscFunctionReturn(0); 838 } 839 840 /*@C 841 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 842 843 Not Collective 844 845 Input Arguments: 846 . sf - star forest 847 848 Output Arguments: 849 + niranks - number of leaf ranks referencing roots on this process 850 . iranks - array of ranks 851 . ioffset - offset in irootloc for each rank (length niranks+1) 852 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 853 854 Level: developer 855 856 .seealso: PetscSFGetRootRanks() 857 @*/ 858 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 859 { 860 PetscErrorCode ierr; 861 862 PetscFunctionBegin; 863 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 864 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 865 if (sf->ops->GetLeafRanks) { 866 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 867 } else { 868 PetscSFType type; 869 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 870 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 871 } 872 PetscFunctionReturn(0); 873 } 874 875 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 876 PetscInt i; 877 for (i=0; i<n; i++) { 878 if (needle == list[i]) return PETSC_TRUE; 879 } 880 return PETSC_FALSE; 881 } 882 883 /*@C 884 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 885 886 Collective 887 888 Input Arguments: 889 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 890 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 891 892 Level: developer 893 894 .seealso: PetscSFGetRootRanks() 895 @*/ 896 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 897 { 898 PetscErrorCode ierr; 899 PetscTable table; 900 PetscTablePosition pos; 901 PetscMPIInt size,groupsize,*groupranks; 902 PetscInt *rcount,*ranks; 903 PetscInt i, irank = -1,orank = -1; 904 905 PetscFunctionBegin; 906 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 907 PetscSFCheckGraphSet(sf,1); 908 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 909 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 910 for (i=0; i<sf->nleaves; i++) { 911 /* Log 1-based rank */ 912 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 913 } 914 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 915 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 916 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 917 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 918 for (i=0; i<sf->nranks; i++) { 919 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 920 ranks[i]--; /* Convert back to 0-based */ 921 } 922 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 923 924 /* We expect that dgroup is reliably "small" while nranks could be large */ 925 { 926 MPI_Group group = MPI_GROUP_NULL; 927 PetscMPIInt *dgroupranks; 928 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 929 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 930 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 931 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 932 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 933 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 934 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 935 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 936 } 937 938 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 939 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 940 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 941 if (InList(ranks[i],groupsize,groupranks)) break; 942 } 943 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 944 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 945 } 946 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 947 PetscInt tmprank,tmpcount; 948 949 tmprank = ranks[i]; 950 tmpcount = rcount[i]; 951 ranks[i] = ranks[sf->ndranks]; 952 rcount[i] = rcount[sf->ndranks]; 953 ranks[sf->ndranks] = tmprank; 954 rcount[sf->ndranks] = tmpcount; 955 sf->ndranks++; 956 } 957 } 958 ierr = PetscFree(groupranks);CHKERRQ(ierr); 959 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 960 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 961 sf->roffset[0] = 0; 962 for (i=0; i<sf->nranks; i++) { 963 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 964 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 965 rcount[i] = 0; 966 } 967 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 968 /* short circuit */ 969 if (orank != sf->remote[i].rank) { 970 /* Search for index of iremote[i].rank in sf->ranks */ 971 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 972 if (irank < 0) { 973 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 974 if (irank >= 0) irank += sf->ndranks; 975 } 976 orank = sf->remote[i].rank; 977 } 978 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 979 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 980 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 981 rcount[irank]++; 982 } 983 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 /*@C 988 PetscSFGetGroups - gets incoming and outgoing process groups 989 990 Collective 991 992 Input Argument: 993 . sf - star forest 994 995 Output Arguments: 996 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 997 - outgoing - group of destination processes for outgoing edges (roots that I reference) 998 999 Level: developer 1000 1001 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1002 @*/ 1003 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1004 { 1005 PetscErrorCode ierr; 1006 MPI_Group group = MPI_GROUP_NULL; 1007 1008 PetscFunctionBegin; 1009 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups"); 1010 if (sf->ingroup == MPI_GROUP_NULL) { 1011 PetscInt i; 1012 const PetscInt *indegree; 1013 PetscMPIInt rank,*outranks,*inranks; 1014 PetscSFNode *remote; 1015 PetscSF bgcount; 1016 1017 /* Compute the number of incoming ranks */ 1018 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1019 for (i=0; i<sf->nranks; i++) { 1020 remote[i].rank = sf->ranks[i]; 1021 remote[i].index = 0; 1022 } 1023 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1024 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1025 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1026 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1027 1028 /* Enumerate the incoming ranks */ 1029 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1030 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1031 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1032 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1033 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1034 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1035 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1036 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1037 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1038 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1039 } 1040 *incoming = sf->ingroup; 1041 1042 if (sf->outgroup == MPI_GROUP_NULL) { 1043 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1044 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1045 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1046 } 1047 *outgoing = sf->outgroup; 1048 PetscFunctionReturn(0); 1049 } 1050 1051 /*@ 1052 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1053 1054 Collective 1055 1056 Input Argument: 1057 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1058 1059 Output Arguments: 1060 . multi - star forest with split roots, such that each root has degree exactly 1 1061 1062 Level: developer 1063 1064 Notes: 1065 1066 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1067 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1068 edge, it is a candidate for future optimization that might involve its removal. 1069 1070 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1071 @*/ 1072 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1073 { 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1078 PetscValidPointer(multi,2); 1079 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1080 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1081 *multi = sf->multi; 1082 PetscFunctionReturn(0); 1083 } 1084 if (!sf->multi) { 1085 const PetscInt *indegree; 1086 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1087 PetscSFNode *remote; 1088 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1089 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1090 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1091 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1092 inoffset[0] = 0; 1093 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1094 for (i=0; i<maxlocal; i++) outones[i] = 1; 1095 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1096 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1097 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1098 #if 0 1099 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1100 for (i=0; i<sf->nroots; i++) { 1101 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1102 } 1103 #endif 1104 #endif 1105 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1106 for (i=0; i<sf->nleaves; i++) { 1107 remote[i].rank = sf->remote[i].rank; 1108 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1109 } 1110 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1111 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1112 if (sf->rankorder) { /* Sort the ranks */ 1113 PetscMPIInt rank; 1114 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1115 PetscSFNode *newremote; 1116 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1117 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1118 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1119 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1120 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1121 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1122 /* Sort the incoming ranks at each vertex, build the inverse map */ 1123 for (i=0; i<sf->nroots; i++) { 1124 PetscInt j; 1125 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1126 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1127 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1128 } 1129 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1130 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1131 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1132 for (i=0; i<sf->nleaves; i++) { 1133 newremote[i].rank = sf->remote[i].rank; 1134 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1135 } 1136 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1137 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1138 } 1139 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1140 } 1141 *multi = sf->multi; 1142 PetscFunctionReturn(0); 1143 } 1144 1145 /*@C 1146 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1147 1148 Collective 1149 1150 Input Arguments: 1151 + sf - original star forest 1152 . nselected - number of selected roots on this process 1153 - selected - indices of the selected roots on this process 1154 1155 Output Arguments: 1156 . newsf - new star forest 1157 1158 Level: advanced 1159 1160 Note: 1161 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1162 be done by calling PetscSFGetGraph(). 1163 1164 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1165 @*/ 1166 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1167 { 1168 PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal; 1169 const PetscSFNode *iremote; 1170 PetscSFNode *new_iremote; 1171 PetscSF tmpsf; 1172 MPI_Comm comm; 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1177 PetscSFCheckGraphSet(sf,1); 1178 if (nselected) PetscValidPointer(selected,3); 1179 PetscValidPointer(newsf,4); 1180 1181 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1182 1183 /* Uniq selected[] and put results in roots[] */ 1184 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 1186 ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr); 1187 ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr); 1188 if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots); 1189 1190 if (sf->ops->CreateEmbeddedSF) { 1191 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr); 1192 } else { 1193 /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */ 1194 /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */ 1195 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 1196 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr); 1197 ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr); 1198 ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr); 1199 for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1; 1200 ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1201 ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1202 ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr); 1203 1204 /* Build newsf with leaves that are still connected */ 1205 connected_leaves = 0; 1206 for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i]; 1207 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 1208 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 1209 for (i=0, n=0; i<nleaves; ++i) { 1210 if (leafdata[i]) { 1211 new_ilocal[n] = sf->mine ? sf->mine[i] : i; 1212 new_iremote[n].rank = sf->remote[i].rank; 1213 new_iremote[n].index = sf->remote[i].index; 1214 ++n; 1215 } 1216 } 1217 1218 if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves); 1219 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1220 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1221 ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1222 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 1223 } 1224 ierr = PetscFree(roots);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /*@C 1229 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1230 1231 Collective 1232 1233 Input Arguments: 1234 + sf - original star forest 1235 . nselected - number of selected leaves on this process 1236 - selected - indices of the selected leaves on this process 1237 1238 Output Arguments: 1239 . newsf - new star forest 1240 1241 Level: advanced 1242 1243 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1244 @*/ 1245 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1246 { 1247 const PetscSFNode *iremote; 1248 PetscSFNode *new_iremote; 1249 const PetscInt *ilocal; 1250 PetscInt i,nroots,*leaves,*new_ilocal; 1251 MPI_Comm comm; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1256 PetscSFCheckGraphSet(sf,1); 1257 if (nselected) PetscValidPointer(selected,3); 1258 PetscValidPointer(newsf,4); 1259 1260 /* Uniq selected[] and put results in leaves[] */ 1261 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1262 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1263 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1264 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1265 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1266 1267 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1268 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1269 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1270 } else { 1271 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1272 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1273 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1274 for (i=0; i<nselected; ++i) { 1275 const PetscInt l = leaves[i]; 1276 new_ilocal[i] = ilocal ? ilocal[l] : l; 1277 new_iremote[i].rank = iremote[l].rank; 1278 new_iremote[i].index = iremote[l].index; 1279 } 1280 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1281 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1282 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1283 } 1284 ierr = PetscFree(leaves);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 /*@C 1289 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1290 1291 Collective on PetscSF 1292 1293 Input Arguments: 1294 + sf - star forest on which to communicate 1295 . unit - data type associated with each node 1296 . rootdata - buffer to broadcast 1297 - op - operation to use for reduction 1298 1299 Output Arguments: 1300 . leafdata - buffer to be reduced with values from each leaf's respective root 1301 1302 Level: intermediate 1303 1304 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1305 @*/ 1306 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1307 { 1308 PetscErrorCode ierr; 1309 PetscMemType rootmtype,leafmtype; 1310 1311 PetscFunctionBegin; 1312 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1313 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1314 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1315 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1316 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1317 #if defined(PETSC_HAVE_CUDA) 1318 /* Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)? 1319 To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream. 1320 But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if 1321 we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also 1322 computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize 1323 is inevitable. We do it now and put pack/unpack kernels to non-NULL streams. 1324 */ 1325 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1326 #endif 1327 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1328 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 /*@C 1333 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1334 1335 Collective 1336 1337 Input Arguments: 1338 + sf - star forest 1339 . unit - data type 1340 . rootdata - buffer to broadcast 1341 - op - operation to use for reduction 1342 1343 Output Arguments: 1344 . leafdata - buffer to be reduced with values from each leaf's respective root 1345 1346 Level: intermediate 1347 1348 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1349 @*/ 1350 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1351 { 1352 PetscErrorCode ierr; 1353 PetscMemType rootmtype,leafmtype; 1354 1355 PetscFunctionBegin; 1356 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1357 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1358 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1359 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1360 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1361 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1362 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 /*@C 1367 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1368 1369 Collective 1370 1371 Input Arguments: 1372 + sf - star forest 1373 . unit - data type 1374 . leafdata - values to reduce 1375 - op - reduction operation 1376 1377 Output Arguments: 1378 . rootdata - result of reduction of values from all leaves of each root 1379 1380 Level: intermediate 1381 1382 .seealso: PetscSFBcastBegin() 1383 @*/ 1384 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1385 { 1386 PetscErrorCode ierr; 1387 PetscMemType rootmtype,leafmtype; 1388 1389 PetscFunctionBegin; 1390 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1391 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1392 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1393 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1394 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1395 #if defined(PETSC_HAVE_CUDA) 1396 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1397 #endif 1398 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1399 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 /*@C 1404 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1405 1406 Collective 1407 1408 Input Arguments: 1409 + sf - star forest 1410 . unit - data type 1411 . leafdata - values to reduce 1412 - op - reduction operation 1413 1414 Output Arguments: 1415 . rootdata - result of reduction of values from all leaves of each root 1416 1417 Level: intermediate 1418 1419 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1420 @*/ 1421 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1422 { 1423 PetscErrorCode ierr; 1424 PetscMemType rootmtype,leafmtype; 1425 1426 PetscFunctionBegin; 1427 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1428 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1429 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1430 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1431 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1432 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1433 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 /*@C 1438 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1439 1440 Collective 1441 1442 Input Arguments: 1443 + sf - star forest 1444 . unit - data type 1445 . leafdata - leaf values to use in reduction 1446 - op - operation to use for reduction 1447 1448 Output Arguments: 1449 + rootdata - root values to be updated, input state is seen by first process to perform an update 1450 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1451 1452 Level: advanced 1453 1454 Note: 1455 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1456 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1457 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1458 integers. 1459 1460 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1461 @*/ 1462 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1463 { 1464 PetscErrorCode ierr; 1465 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1466 1467 PetscFunctionBegin; 1468 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1469 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1470 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1471 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1472 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1473 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1474 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1475 #if defined(PETSC_HAVE_CUDA) 1476 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1477 #endif 1478 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1479 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 /*@C 1484 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1485 1486 Collective 1487 1488 Input Arguments: 1489 + sf - star forest 1490 . unit - data type 1491 . leafdata - leaf values to use in reduction 1492 - op - operation to use for reduction 1493 1494 Output Arguments: 1495 + rootdata - root values to be updated, input state is seen by first process to perform an update 1496 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1497 1498 Level: advanced 1499 1500 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1501 @*/ 1502 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1503 { 1504 PetscErrorCode ierr; 1505 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1506 1507 PetscFunctionBegin; 1508 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1509 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1510 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1511 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1512 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1513 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1514 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1515 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1516 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 /*@C 1521 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1522 1523 Collective 1524 1525 Input Arguments: 1526 . sf - star forest 1527 1528 Output Arguments: 1529 . degree - degree of each root vertex 1530 1531 Level: advanced 1532 1533 Notes: 1534 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1535 1536 .seealso: PetscSFGatherBegin() 1537 @*/ 1538 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1539 { 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1544 PetscSFCheckGraphSet(sf,1); 1545 PetscValidPointer(degree,2); 1546 if (!sf->degreeknown) { 1547 PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1548 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1549 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1550 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1551 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1552 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1553 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1554 } 1555 *degree = NULL; 1556 PetscFunctionReturn(0); 1557 } 1558 1559 /*@C 1560 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1561 1562 Collective 1563 1564 Input Arguments: 1565 . sf - star forest 1566 1567 Output Arguments: 1568 . degree - degree of each root vertex 1569 1570 Level: developer 1571 1572 Notes: 1573 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1574 1575 .seealso: 1576 @*/ 1577 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1578 { 1579 PetscErrorCode ierr; 1580 1581 PetscFunctionBegin; 1582 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1583 PetscSFCheckGraphSet(sf,1); 1584 PetscValidPointer(degree,2); 1585 if (!sf->degreeknown) { 1586 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1587 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1588 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1589 sf->degreeknown = PETSC_TRUE; 1590 } 1591 *degree = sf->degree; 1592 PetscFunctionReturn(0); 1593 } 1594 1595 1596 /*@C 1597 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1598 Each multi-root is assigned index of the corresponding original root. 1599 1600 Collective 1601 1602 Input Arguments: 1603 + sf - star forest 1604 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1605 1606 Output Arguments: 1607 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1608 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1609 1610 Level: developer 1611 1612 Notes: 1613 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1614 1615 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1616 @*/ 1617 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1618 { 1619 PetscSF msf; 1620 PetscInt i, j, k, nroots, nmroots; 1621 PetscErrorCode ierr; 1622 1623 PetscFunctionBegin; 1624 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1625 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1626 if (nroots) PetscValidIntPointer(degree,2); 1627 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1628 PetscValidPointer(multiRootsOrigNumbering,4); 1629 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1630 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1631 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1632 for (i=0,j=0,k=0; i<nroots; i++) { 1633 if (!degree[i]) continue; 1634 for (j=0; j<degree[i]; j++,k++) { 1635 (*multiRootsOrigNumbering)[k] = i; 1636 } 1637 } 1638 #if defined(PETSC_USE_DEBUG) 1639 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1640 #endif 1641 if (nMultiRoots) *nMultiRoots = nmroots; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 /*@C 1646 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1647 1648 Collective 1649 1650 Input Arguments: 1651 + sf - star forest 1652 . unit - data type 1653 - leafdata - leaf data to gather to roots 1654 1655 Output Argument: 1656 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1657 1658 Level: intermediate 1659 1660 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1661 @*/ 1662 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1663 { 1664 PetscErrorCode ierr; 1665 PetscSF multi; 1666 1667 PetscFunctionBegin; 1668 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1669 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1670 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1671 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 1675 /*@C 1676 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1677 1678 Collective 1679 1680 Input Arguments: 1681 + sf - star forest 1682 . unit - data type 1683 - leafdata - leaf data to gather to roots 1684 1685 Output Argument: 1686 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1687 1688 Level: intermediate 1689 1690 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1691 @*/ 1692 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1693 { 1694 PetscErrorCode ierr; 1695 PetscSF multi; 1696 1697 PetscFunctionBegin; 1698 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1699 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1700 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1701 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 /*@C 1706 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1707 1708 Collective 1709 1710 Input Arguments: 1711 + sf - star forest 1712 . unit - data type 1713 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1714 1715 Output Argument: 1716 . leafdata - leaf data to be update with personal data from each respective root 1717 1718 Level: intermediate 1719 1720 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1721 @*/ 1722 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1723 { 1724 PetscErrorCode ierr; 1725 PetscSF multi; 1726 1727 PetscFunctionBegin; 1728 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1729 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1730 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1731 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 1735 /*@C 1736 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1737 1738 Collective 1739 1740 Input Arguments: 1741 + sf - star forest 1742 . unit - data type 1743 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1744 1745 Output Argument: 1746 . leafdata - leaf data to be update with personal data from each respective root 1747 1748 Level: intermediate 1749 1750 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1751 @*/ 1752 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1753 { 1754 PetscErrorCode ierr; 1755 PetscSF multi; 1756 1757 PetscFunctionBegin; 1758 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1759 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1760 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1761 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 1765 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1766 { 1767 #if defined(PETSC_USE_DEBUG) 1768 PetscInt i, n, nleaves; 1769 const PetscInt *ilocal = NULL; 1770 PetscHSetI seen; 1771 PetscErrorCode ierr; 1772 1773 PetscFunctionBegin; 1774 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1775 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1776 for (i = 0; i < nleaves; i++) { 1777 const PetscInt leaf = ilocal ? ilocal[i] : i; 1778 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1779 } 1780 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1781 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1782 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1783 PetscFunctionReturn(0); 1784 #else 1785 PetscFunctionBegin; 1786 PetscFunctionReturn(0); 1787 #endif 1788 } 1789 1790 /*@ 1791 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1792 1793 Input Parameters: 1794 + sfA - The first PetscSF 1795 - sfB - The second PetscSF 1796 1797 Output Parameters: 1798 . sfBA - The composite SF 1799 1800 Level: developer 1801 1802 Notes: 1803 Currently, the two SFs must be defined on congruent communicators and they must be true star 1804 forests, i.e. the same leaf is not connected with different roots. 1805 1806 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1807 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1808 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1809 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1810 1811 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1812 @*/ 1813 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1814 { 1815 PetscErrorCode ierr; 1816 const PetscSFNode *remotePointsA,*remotePointsB; 1817 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1818 const PetscInt *localPointsA,*localPointsB; 1819 PetscInt *localPointsBA; 1820 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1821 PetscBool denseB; 1822 1823 PetscFunctionBegin; 1824 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1825 PetscSFCheckGraphSet(sfA,1); 1826 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1827 PetscSFCheckGraphSet(sfB,2); 1828 PetscCheckSameComm(sfA,1,sfB,2); 1829 PetscValidPointer(sfBA,3); 1830 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1831 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1832 1833 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1834 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1835 if (localPointsA) { 1836 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1837 for (i=0; i<numRootsB; i++) { 1838 reorderedRemotePointsA[i].rank = -1; 1839 reorderedRemotePointsA[i].index = -1; 1840 } 1841 for (i=0; i<numLeavesA; i++) { 1842 if (localPointsA[i] >= numRootsB) continue; 1843 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1844 } 1845 remotePointsA = reorderedRemotePointsA; 1846 } 1847 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1848 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1849 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1850 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1851 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1852 1853 denseB = (PetscBool)!localPointsB; 1854 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1855 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1856 else numLeavesBA++; 1857 } 1858 if (denseB) { 1859 localPointsBA = NULL; 1860 remotePointsBA = leafdataB; 1861 } else { 1862 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1863 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1864 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1865 const PetscInt l = localPointsB ? localPointsB[i] : i; 1866 1867 if (leafdataB[l-minleaf].rank == -1) continue; 1868 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1869 localPointsBA[numLeavesBA] = l; 1870 numLeavesBA++; 1871 } 1872 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1873 } 1874 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1875 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 /*@ 1880 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1881 1882 Input Parameters: 1883 + sfA - The first PetscSF 1884 - sfB - The second PetscSF 1885 1886 Output Parameters: 1887 . sfBA - The composite SF. 1888 1889 Level: developer 1890 1891 Notes: 1892 Currently, the two SFs must be defined on congruent communicators and they must be true star 1893 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1894 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1895 1896 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1897 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1898 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1899 a Reduce on sfB, on connected roots. 1900 1901 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1902 @*/ 1903 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1904 { 1905 PetscErrorCode ierr; 1906 const PetscSFNode *remotePointsA,*remotePointsB; 1907 PetscSFNode *remotePointsBA; 1908 const PetscInt *localPointsA,*localPointsB; 1909 PetscSFNode *reorderedRemotePointsA = NULL; 1910 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1911 1912 PetscFunctionBegin; 1913 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1914 PetscSFCheckGraphSet(sfA,1); 1915 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1916 PetscSFCheckGraphSet(sfB,2); 1917 PetscCheckSameComm(sfA,1,sfB,2); 1918 PetscValidPointer(sfBA,3); 1919 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1920 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1921 /* TODO: Check roots of sfB have degree of 1 */ 1922 1923 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1924 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1925 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1926 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1927 for (i=0; i<maxleaf - minleaf + 1; i++) { 1928 reorderedRemotePointsA[i].rank = -1; 1929 reorderedRemotePointsA[i].index = -1; 1930 } 1931 if (localPointsA) { 1932 for (i=0; i<numLeavesA; i++) { 1933 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1934 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1935 } 1936 } else { 1937 for (i=0; i<numLeavesA; i++) { 1938 if (i > maxleaf || i < minleaf) continue; 1939 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1940 } 1941 } 1942 1943 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1944 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1945 for (i=0; i<numRootsB; i++) { 1946 remotePointsBA[i].rank = -1; 1947 remotePointsBA[i].index = -1; 1948 } 1949 1950 /* Once we implement the TODO above (check all roots of sfB have degree of 1), we can replace the MPI_MAXLOC 1951 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1952 */ 1953 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr); 1954 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr); 1955 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1956 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1957 if (remotePointsBA[i].rank == -1) continue; 1958 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1959 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1960 localPointsBA[numLeavesBA] = i; 1961 numLeavesBA++; 1962 } 1963 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1964 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1965 PetscFunctionReturn(0); 1966 } 1967 1968 /* 1969 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1970 1971 Input Parameters: 1972 . sf - The global PetscSF 1973 1974 Output Parameters: 1975 . out - The local PetscSF 1976 */ 1977 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1978 { 1979 MPI_Comm comm; 1980 PetscMPIInt myrank; 1981 const PetscInt *ilocal; 1982 const PetscSFNode *iremote; 1983 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1984 PetscSFNode *liremote; 1985 PetscSF lsf; 1986 PetscErrorCode ierr; 1987 1988 PetscFunctionBegin; 1989 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1990 if (sf->ops->CreateLocalSF) { 1991 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1992 } else { 1993 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 1994 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1995 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 1996 1997 /* Find out local edges and build a local SF */ 1998 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1999 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2000 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2001 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2002 2003 for (i=j=0; i<nleaves; i++) { 2004 if (iremote[i].rank == (PetscInt)myrank) { 2005 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2006 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2007 liremote[j].index = iremote[i].index; 2008 j++; 2009 } 2010 } 2011 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2012 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2013 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2014 *out = lsf; 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2020 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2021 { 2022 PetscErrorCode ierr; 2023 PetscMemType rootmtype,leafmtype; 2024 2025 PetscFunctionBegin; 2026 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2027 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2028 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2029 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2030 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2031 if (sf->ops->BcastToZero) { 2032 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2033 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2034 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038