1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;} 98 #endif 99 sf->degreeknown = PETSC_FALSE; 100 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 101 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 102 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 103 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 104 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 105 sf->setupcalled = PETSC_FALSE; 106 PetscFunctionReturn(0); 107 } 108 109 /*@C 110 PetscSFSetType - Set the PetscSF communication implementation 111 112 Collective on PetscSF 113 114 Input Parameters: 115 + sf - the PetscSF context 116 - type - a known method 117 118 Options Database Key: 119 . -sf_type <type> - Sets the method; use -help for a list 120 of available methods (for instance, window, pt2pt, neighbor) 121 122 Notes: 123 See "include/petscsf.h" for available methods (for instance) 124 + PETSCSFWINDOW - MPI-2/3 one-sided 125 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 126 127 Level: intermediate 128 129 .seealso: PetscSFType, PetscSFCreate() 130 @*/ 131 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 132 { 133 PetscErrorCode ierr,(*r)(PetscSF); 134 PetscBool match; 135 136 PetscFunctionBegin; 137 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 138 PetscValidCharPointer(type,2); 139 140 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 141 if (match) PetscFunctionReturn(0); 142 143 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 144 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 145 /* Destroy the previous PetscSF implementation context */ 146 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 147 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 148 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 149 ierr = (*r)(sf);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /*@C 154 PetscSFGetType - Get the PetscSF communication implementation 155 156 Not Collective 157 158 Input Parameter: 159 . sf - the PetscSF context 160 161 Output Parameter: 162 . type - the PetscSF type name 163 164 Level: intermediate 165 166 .seealso: PetscSFSetType(), PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 169 { 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 172 PetscValidPointer(type,2); 173 *type = ((PetscObject)sf)->type_name; 174 PetscFunctionReturn(0); 175 } 176 177 /*@ 178 PetscSFDestroy - destroy star forest 179 180 Collective 181 182 Input Arguments: 183 . sf - address of star forest 184 185 Level: intermediate 186 187 .seealso: PetscSFCreate(), PetscSFReset() 188 @*/ 189 PetscErrorCode PetscSFDestroy(PetscSF *sf) 190 { 191 PetscErrorCode ierr; 192 193 PetscFunctionBegin; 194 if (!*sf) PetscFunctionReturn(0); 195 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 196 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 197 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 198 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 199 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 204 { 205 #if defined(PETSC_USE_DEBUG) 206 PetscInt i, nleaves; 207 PetscMPIInt size; 208 const PetscInt *ilocal; 209 const PetscSFNode *iremote; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!sf->graphset) PetscFunctionReturn(0); 214 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 215 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 216 for (i = 0; i < nleaves; i++) { 217 const PetscInt rank = iremote[i].rank; 218 const PetscInt remote = iremote[i].index; 219 const PetscInt leaf = ilocal ? ilocal[i] : i; 220 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 221 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 222 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 223 } 224 PetscFunctionReturn(0); 225 #else 226 PetscFunctionBegin; 227 PetscFunctionReturn(0); 228 #endif 229 } 230 231 /*@ 232 PetscSFSetUp - set up communication structures 233 234 Collective 235 236 Input Arguments: 237 . sf - star forest communication object 238 239 Level: beginner 240 241 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 242 @*/ 243 PetscErrorCode PetscSFSetUp(PetscSF sf) 244 { 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 249 PetscSFCheckGraphSet(sf,1); 250 if (sf->setupcalled) PetscFunctionReturn(0); 251 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 252 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 253 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 254 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 255 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 256 sf->setupcalled = PETSC_TRUE; 257 PetscFunctionReturn(0); 258 } 259 260 /*@ 261 PetscSFSetFromOptions - set PetscSF options using the options database 262 263 Logically Collective 264 265 Input Arguments: 266 . sf - star forest 267 268 Options Database Keys: 269 + -sf_type - implementation type, see PetscSFSetType() 270 - -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 271 272 Level: intermediate 273 274 .seealso: PetscSFWindowSetSyncType() 275 @*/ 276 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 277 { 278 PetscSFType deft; 279 char type[256]; 280 PetscErrorCode ierr; 281 PetscBool flg; 282 283 PetscFunctionBegin; 284 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 285 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 286 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 287 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 288 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 289 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 290 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 291 ierr = PetscOptionsEnd();CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 /*@ 296 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 297 298 Logically Collective 299 300 Input Arguments: 301 + sf - star forest 302 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 303 304 Level: advanced 305 306 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 307 @*/ 308 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 309 { 310 311 PetscFunctionBegin; 312 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 313 PetscValidLogicalCollectiveBool(sf,flg,2); 314 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 315 sf->rankorder = flg; 316 PetscFunctionReturn(0); 317 } 318 319 /*@ 320 PetscSFSetGraph - Set a parallel star forest 321 322 Collective 323 324 Input Arguments: 325 + sf - star forest 326 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 327 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 328 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 329 during setup in debug mode) 330 . localmode - copy mode for ilocal 331 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 332 during setup in debug mode) 333 - remotemode - copy mode for iremote 334 335 Level: intermediate 336 337 Notes: 338 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 339 340 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 341 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 342 needed 343 344 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 345 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 346 347 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 348 @*/ 349 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 355 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 356 if (nleaves > 0) PetscValidPointer(iremote,6); 357 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 358 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 359 360 ierr = PetscSFReset(sf);CHKERRQ(ierr); 361 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 362 363 sf->nroots = nroots; 364 sf->nleaves = nleaves; 365 366 if (nleaves && ilocal) { 367 PetscInt i; 368 PetscInt minleaf = PETSC_MAX_INT; 369 PetscInt maxleaf = PETSC_MIN_INT; 370 int contiguous = 1; 371 for (i=0; i<nleaves; i++) { 372 minleaf = PetscMin(minleaf,ilocal[i]); 373 maxleaf = PetscMax(maxleaf,ilocal[i]); 374 contiguous &= (ilocal[i] == i); 375 } 376 sf->minleaf = minleaf; 377 sf->maxleaf = maxleaf; 378 if (contiguous) { 379 if (localmode == PETSC_OWN_POINTER) { 380 ierr = PetscFree(ilocal);CHKERRQ(ierr); 381 } 382 ilocal = NULL; 383 } 384 } else { 385 sf->minleaf = 0; 386 sf->maxleaf = nleaves - 1; 387 } 388 389 if (ilocal) { 390 switch (localmode) { 391 case PETSC_COPY_VALUES: 392 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 393 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 394 sf->mine = sf->mine_alloc; 395 break; 396 case PETSC_OWN_POINTER: 397 sf->mine_alloc = (PetscInt*)ilocal; 398 sf->mine = sf->mine_alloc; 399 break; 400 case PETSC_USE_POINTER: 401 sf->mine_alloc = NULL; 402 sf->mine = (PetscInt*)ilocal; 403 break; 404 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 405 } 406 } 407 408 switch (remotemode) { 409 case PETSC_COPY_VALUES: 410 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 411 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 412 sf->remote = sf->remote_alloc; 413 break; 414 case PETSC_OWN_POINTER: 415 sf->remote_alloc = (PetscSFNode*)iremote; 416 sf->remote = sf->remote_alloc; 417 break; 418 case PETSC_USE_POINTER: 419 sf->remote_alloc = NULL; 420 sf->remote = (PetscSFNode*)iremote; 421 break; 422 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 423 } 424 425 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 426 sf->graphset = PETSC_TRUE; 427 PetscFunctionReturn(0); 428 } 429 430 /*@ 431 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 432 433 Collective 434 435 Input Parameters: 436 + sf - The PetscSF 437 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 438 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 439 440 Notes: 441 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 442 n and N are local and global sizes of x respectively. 443 444 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 445 sequential vectors y on all ranks. 446 447 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 448 sequential vector y on rank 0. 449 450 In above cases, entries of x are roots and entries of y are leaves. 451 452 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 453 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 454 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 455 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 456 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 457 458 In this case, roots and leaves are symmetric. 459 460 Level: intermediate 461 @*/ 462 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 463 { 464 MPI_Comm comm; 465 PetscInt n,N,res[2]; 466 PetscMPIInt rank,size; 467 PetscSFType type; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 472 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 473 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 474 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 475 476 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 477 type = PETSCSFALLTOALL; 478 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 479 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 480 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 481 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 482 } else { 483 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 484 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 485 res[0] = n; 486 res[1] = -n; 487 /* Check if n are same over all ranks so that we can optimize it */ 488 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 489 if (res[0] == -res[1]) { /* same n */ 490 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 491 } else { 492 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 493 } 494 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 495 } 496 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 497 498 sf->pattern = pattern; 499 sf->mine = NULL; /* Contiguous */ 500 501 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 502 Also set other easy stuff. 503 */ 504 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 505 sf->nleaves = N; 506 sf->nroots = n; 507 sf->nranks = size; 508 sf->minleaf = 0; 509 sf->maxleaf = N - 1; 510 } else if (pattern == PETSCSF_PATTERN_GATHER) { 511 sf->nleaves = rank ? 0 : N; 512 sf->nroots = n; 513 sf->nranks = rank ? 0 : size; 514 sf->minleaf = 0; 515 sf->maxleaf = rank ? -1 : N - 1; 516 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 517 sf->nleaves = size; 518 sf->nroots = size; 519 sf->nranks = size; 520 sf->minleaf = 0; 521 sf->maxleaf = size - 1; 522 } 523 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 524 sf->graphset = PETSC_TRUE; 525 PetscFunctionReturn(0); 526 } 527 528 /*@ 529 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 530 531 Collective 532 533 Input Arguments: 534 535 . sf - star forest to invert 536 537 Output Arguments: 538 . isf - inverse of sf 539 Level: advanced 540 541 Notes: 542 All roots must have degree 1. 543 544 The local space may be a permutation, but cannot be sparse. 545 546 .seealso: PetscSFSetGraph() 547 @*/ 548 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 549 { 550 PetscErrorCode ierr; 551 PetscMPIInt rank; 552 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 553 const PetscInt *ilocal; 554 PetscSFNode *roots,*leaves; 555 556 PetscFunctionBegin; 557 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 558 PetscSFCheckGraphSet(sf,1); 559 PetscValidPointer(isf,2); 560 561 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 562 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 563 564 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 565 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 566 for (i=0; i<maxlocal; i++) { 567 leaves[i].rank = rank; 568 leaves[i].index = i; 569 } 570 for (i=0; i <nroots; i++) { 571 roots[i].rank = -1; 572 roots[i].index = -1; 573 } 574 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 575 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 576 577 /* Check whether our leaves are sparse */ 578 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 579 if (count == nroots) newilocal = NULL; 580 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 581 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 582 for (i=0,count=0; i<nroots; i++) { 583 if (roots[i].rank >= 0) { 584 newilocal[count] = i; 585 roots[count].rank = roots[i].rank; 586 roots[count].index = roots[i].index; 587 count++; 588 } 589 } 590 } 591 592 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 593 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 594 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 /*@ 599 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 600 601 Collective 602 603 Input Arguments: 604 + sf - communication object to duplicate 605 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 606 607 Output Arguments: 608 . newsf - new communication object 609 610 Level: beginner 611 612 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 613 @*/ 614 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 615 { 616 PetscSFType type; 617 PetscErrorCode ierr; 618 619 PetscFunctionBegin; 620 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 621 PetscValidLogicalCollectiveEnum(sf,opt,2); 622 PetscValidPointer(newsf,3); 623 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 624 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 625 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 626 if (opt == PETSCSF_DUPLICATE_GRAPH) { 627 PetscSFCheckGraphSet(sf,1); 628 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 629 PetscInt nroots,nleaves; 630 const PetscInt *ilocal; 631 const PetscSFNode *iremote; 632 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 633 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 634 } else { 635 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 636 } 637 } 638 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 639 PetscFunctionReturn(0); 640 } 641 642 /*@C 643 PetscSFGetGraph - Get the graph specifying a parallel star forest 644 645 Not Collective 646 647 Input Arguments: 648 . sf - star forest 649 650 Output Arguments: 651 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 652 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 653 . ilocal - locations of leaves in leafdata buffers 654 - iremote - remote locations of root vertices for each leaf on the current process 655 656 Notes: 657 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 658 659 When called from Fortran, the returned iremote array is a copy and must deallocated after use. Consequently, if you 660 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 661 662 Level: intermediate 663 664 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 665 @*/ 666 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 667 { 668 PetscErrorCode ierr; 669 670 PetscFunctionBegin; 671 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 672 if (sf->ops->GetGraph) { 673 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 674 } else { 675 if (nroots) *nroots = sf->nroots; 676 if (nleaves) *nleaves = sf->nleaves; 677 if (ilocal) *ilocal = sf->mine; 678 if (iremote) *iremote = sf->remote; 679 } 680 PetscFunctionReturn(0); 681 } 682 683 /*@ 684 PetscSFGetLeafRange - Get the active leaf ranges 685 686 Not Collective 687 688 Input Arguments: 689 . sf - star forest 690 691 Output Arguments: 692 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 693 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 694 695 Level: developer 696 697 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 698 @*/ 699 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 700 { 701 702 PetscFunctionBegin; 703 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 704 PetscSFCheckGraphSet(sf,1); 705 if (minleaf) *minleaf = sf->minleaf; 706 if (maxleaf) *maxleaf = sf->maxleaf; 707 PetscFunctionReturn(0); 708 } 709 710 /*@C 711 PetscSFViewFromOptions - View from Options 712 713 Collective on PetscSF 714 715 Input Parameters: 716 + A - the star forest 717 . obj - Optional object 718 - name - command line option 719 720 Level: intermediate 721 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 722 @*/ 723 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 724 { 725 PetscErrorCode ierr; 726 727 PetscFunctionBegin; 728 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 729 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 /*@C 734 PetscSFView - view a star forest 735 736 Collective 737 738 Input Arguments: 739 + sf - star forest 740 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 741 742 Level: beginner 743 744 .seealso: PetscSFCreate(), PetscSFSetGraph() 745 @*/ 746 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 747 { 748 PetscErrorCode ierr; 749 PetscBool iascii; 750 PetscViewerFormat format; 751 752 PetscFunctionBegin; 753 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 754 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 755 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 756 PetscCheckSameComm(sf,1,viewer,2); 757 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 758 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 759 if (iascii) { 760 PetscMPIInt rank; 761 PetscInt ii,i,j; 762 763 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 764 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 765 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 766 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 767 if (!sf->graphset) { 768 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 769 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } 772 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 773 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 774 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 775 for (i=0; i<sf->nleaves; i++) { 776 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 777 } 778 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 779 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 780 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 781 PetscMPIInt *tmpranks,*perm; 782 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 783 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 784 for (i=0; i<sf->nranks; i++) perm[i] = i; 785 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 786 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 787 for (ii=0; ii<sf->nranks; ii++) { 788 i = perm[ii]; 789 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 790 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 791 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 792 } 793 } 794 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 795 } 796 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 797 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 798 } 799 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 800 } 801 PetscFunctionReturn(0); 802 } 803 804 /*@C 805 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 806 807 Not Collective 808 809 Input Arguments: 810 . sf - star forest 811 812 Output Arguments: 813 + nranks - number of ranks referenced by local part 814 . ranks - array of ranks 815 . roffset - offset in rmine/rremote for each rank (length nranks+1) 816 . rmine - concatenated array holding local indices referencing each remote rank 817 - rremote - concatenated array holding remote indices referenced for each remote rank 818 819 Level: developer 820 821 .seealso: PetscSFGetLeafRanks() 822 @*/ 823 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 824 { 825 PetscErrorCode ierr; 826 827 PetscFunctionBegin; 828 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 829 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 830 if (sf->ops->GetRootRanks) { 831 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 832 } else { 833 /* The generic implementation */ 834 if (nranks) *nranks = sf->nranks; 835 if (ranks) *ranks = sf->ranks; 836 if (roffset) *roffset = sf->roffset; 837 if (rmine) *rmine = sf->rmine; 838 if (rremote) *rremote = sf->rremote; 839 } 840 PetscFunctionReturn(0); 841 } 842 843 /*@C 844 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 845 846 Not Collective 847 848 Input Arguments: 849 . sf - star forest 850 851 Output Arguments: 852 + niranks - number of leaf ranks referencing roots on this process 853 . iranks - array of ranks 854 . ioffset - offset in irootloc for each rank (length niranks+1) 855 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 856 857 Level: developer 858 859 .seealso: PetscSFGetRootRanks() 860 @*/ 861 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 862 { 863 PetscErrorCode ierr; 864 865 PetscFunctionBegin; 866 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 867 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 868 if (sf->ops->GetLeafRanks) { 869 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 870 } else { 871 PetscSFType type; 872 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 873 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 874 } 875 PetscFunctionReturn(0); 876 } 877 878 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 879 PetscInt i; 880 for (i=0; i<n; i++) { 881 if (needle == list[i]) return PETSC_TRUE; 882 } 883 return PETSC_FALSE; 884 } 885 886 /*@C 887 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 888 889 Collective 890 891 Input Arguments: 892 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 893 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 894 895 Level: developer 896 897 .seealso: PetscSFGetRootRanks() 898 @*/ 899 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 900 { 901 PetscErrorCode ierr; 902 PetscTable table; 903 PetscTablePosition pos; 904 PetscMPIInt size,groupsize,*groupranks; 905 PetscInt *rcount,*ranks; 906 PetscInt i, irank = -1,orank = -1; 907 908 PetscFunctionBegin; 909 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 910 PetscSFCheckGraphSet(sf,1); 911 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 912 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 913 for (i=0; i<sf->nleaves; i++) { 914 /* Log 1-based rank */ 915 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 916 } 917 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 918 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 919 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 920 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 921 for (i=0; i<sf->nranks; i++) { 922 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 923 ranks[i]--; /* Convert back to 0-based */ 924 } 925 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 926 927 /* We expect that dgroup is reliably "small" while nranks could be large */ 928 { 929 MPI_Group group = MPI_GROUP_NULL; 930 PetscMPIInt *dgroupranks; 931 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 932 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 933 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 934 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 935 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 936 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 937 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 938 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 939 } 940 941 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 942 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 943 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 944 if (InList(ranks[i],groupsize,groupranks)) break; 945 } 946 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 947 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 948 } 949 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 950 PetscInt tmprank,tmpcount; 951 952 tmprank = ranks[i]; 953 tmpcount = rcount[i]; 954 ranks[i] = ranks[sf->ndranks]; 955 rcount[i] = rcount[sf->ndranks]; 956 ranks[sf->ndranks] = tmprank; 957 rcount[sf->ndranks] = tmpcount; 958 sf->ndranks++; 959 } 960 } 961 ierr = PetscFree(groupranks);CHKERRQ(ierr); 962 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 963 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 964 sf->roffset[0] = 0; 965 for (i=0; i<sf->nranks; i++) { 966 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 967 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 968 rcount[i] = 0; 969 } 970 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 971 /* short circuit */ 972 if (orank != sf->remote[i].rank) { 973 /* Search for index of iremote[i].rank in sf->ranks */ 974 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 975 if (irank < 0) { 976 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 977 if (irank >= 0) irank += sf->ndranks; 978 } 979 orank = sf->remote[i].rank; 980 } 981 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 982 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 983 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 984 rcount[irank]++; 985 } 986 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 /*@C 991 PetscSFGetGroups - gets incoming and outgoing process groups 992 993 Collective 994 995 Input Argument: 996 . sf - star forest 997 998 Output Arguments: 999 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1000 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1001 1002 Level: developer 1003 1004 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1005 @*/ 1006 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1007 { 1008 PetscErrorCode ierr; 1009 MPI_Group group = MPI_GROUP_NULL; 1010 1011 PetscFunctionBegin; 1012 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups"); 1013 if (sf->ingroup == MPI_GROUP_NULL) { 1014 PetscInt i; 1015 const PetscInt *indegree; 1016 PetscMPIInt rank,*outranks,*inranks; 1017 PetscSFNode *remote; 1018 PetscSF bgcount; 1019 1020 /* Compute the number of incoming ranks */ 1021 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1022 for (i=0; i<sf->nranks; i++) { 1023 remote[i].rank = sf->ranks[i]; 1024 remote[i].index = 0; 1025 } 1026 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1027 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1028 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1029 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1030 1031 /* Enumerate the incoming ranks */ 1032 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1033 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1034 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1035 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1036 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1037 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1038 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1039 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1040 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1041 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1042 } 1043 *incoming = sf->ingroup; 1044 1045 if (sf->outgroup == MPI_GROUP_NULL) { 1046 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1047 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1048 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1049 } 1050 *outgoing = sf->outgroup; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 /*@ 1055 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1056 1057 Collective 1058 1059 Input Argument: 1060 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1061 1062 Output Arguments: 1063 . multi - star forest with split roots, such that each root has degree exactly 1 1064 1065 Level: developer 1066 1067 Notes: 1068 1069 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1070 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1071 edge, it is a candidate for future optimization that might involve its removal. 1072 1073 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1074 @*/ 1075 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1076 { 1077 PetscErrorCode ierr; 1078 1079 PetscFunctionBegin; 1080 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1081 PetscValidPointer(multi,2); 1082 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1083 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1084 *multi = sf->multi; 1085 PetscFunctionReturn(0); 1086 } 1087 if (!sf->multi) { 1088 const PetscInt *indegree; 1089 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1090 PetscSFNode *remote; 1091 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1092 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1093 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1094 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1095 inoffset[0] = 0; 1096 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1097 for (i=0; i<maxlocal; i++) outones[i] = 1; 1098 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1099 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1100 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1101 #if 0 1102 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1103 for (i=0; i<sf->nroots; i++) { 1104 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1105 } 1106 #endif 1107 #endif 1108 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1109 for (i=0; i<sf->nleaves; i++) { 1110 remote[i].rank = sf->remote[i].rank; 1111 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1112 } 1113 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1114 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1115 if (sf->rankorder) { /* Sort the ranks */ 1116 PetscMPIInt rank; 1117 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1118 PetscSFNode *newremote; 1119 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1120 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1121 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1122 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1123 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1124 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1125 /* Sort the incoming ranks at each vertex, build the inverse map */ 1126 for (i=0; i<sf->nroots; i++) { 1127 PetscInt j; 1128 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1129 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1130 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1131 } 1132 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1133 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1134 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1135 for (i=0; i<sf->nleaves; i++) { 1136 newremote[i].rank = sf->remote[i].rank; 1137 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1138 } 1139 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1140 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1141 } 1142 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1143 } 1144 *multi = sf->multi; 1145 PetscFunctionReturn(0); 1146 } 1147 1148 /*@C 1149 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1150 1151 Collective 1152 1153 Input Arguments: 1154 + sf - original star forest 1155 . nselected - number of selected roots on this process 1156 - selected - indices of the selected roots on this process 1157 1158 Output Arguments: 1159 . newsf - new star forest 1160 1161 Level: advanced 1162 1163 Note: 1164 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1165 be done by calling PetscSFGetGraph(). 1166 1167 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1168 @*/ 1169 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1170 { 1171 PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal; 1172 const PetscSFNode *iremote; 1173 PetscSFNode *new_iremote; 1174 PetscSF tmpsf; 1175 MPI_Comm comm; 1176 PetscErrorCode ierr; 1177 1178 PetscFunctionBegin; 1179 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1180 PetscSFCheckGraphSet(sf,1); 1181 if (nselected) PetscValidPointer(selected,3); 1182 PetscValidPointer(newsf,4); 1183 1184 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1185 1186 /* Uniq selected[] and put results in roots[] */ 1187 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1188 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 1189 ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr); 1190 ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr); 1191 if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots); 1192 1193 if (sf->ops->CreateEmbeddedSF) { 1194 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr); 1195 } else { 1196 /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */ 1197 /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */ 1198 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 1199 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr); 1200 ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr); 1201 ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr); 1202 for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1; 1203 ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1204 ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1205 ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr); 1206 1207 /* Build newsf with leaves that are still connected */ 1208 connected_leaves = 0; 1209 for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i]; 1210 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 1211 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 1212 for (i=0, n=0; i<nleaves; ++i) { 1213 if (leafdata[i]) { 1214 new_ilocal[n] = sf->mine ? sf->mine[i] : i; 1215 new_iremote[n].rank = sf->remote[i].rank; 1216 new_iremote[n].index = sf->remote[i].index; 1217 ++n; 1218 } 1219 } 1220 1221 if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves); 1222 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1223 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1224 ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1225 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 1226 } 1227 ierr = PetscFree(roots);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 /*@C 1232 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1233 1234 Collective 1235 1236 Input Arguments: 1237 + sf - original star forest 1238 . nselected - number of selected leaves on this process 1239 - selected - indices of the selected leaves on this process 1240 1241 Output Arguments: 1242 . newsf - new star forest 1243 1244 Level: advanced 1245 1246 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1247 @*/ 1248 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1249 { 1250 const PetscSFNode *iremote; 1251 PetscSFNode *new_iremote; 1252 const PetscInt *ilocal; 1253 PetscInt i,nroots,*leaves,*new_ilocal; 1254 MPI_Comm comm; 1255 PetscErrorCode ierr; 1256 1257 PetscFunctionBegin; 1258 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1259 PetscSFCheckGraphSet(sf,1); 1260 if (nselected) PetscValidPointer(selected,3); 1261 PetscValidPointer(newsf,4); 1262 1263 /* Uniq selected[] and put results in leaves[] */ 1264 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1265 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1266 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1267 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1268 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1269 1270 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1271 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1272 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1273 } else { 1274 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1275 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1276 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1277 for (i=0; i<nselected; ++i) { 1278 const PetscInt l = leaves[i]; 1279 new_ilocal[i] = ilocal ? ilocal[l] : l; 1280 new_iremote[i].rank = iremote[l].rank; 1281 new_iremote[i].index = iremote[l].index; 1282 } 1283 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1284 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1285 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1286 } 1287 ierr = PetscFree(leaves);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 /*@C 1292 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1293 1294 Collective on PetscSF 1295 1296 Input Arguments: 1297 + sf - star forest on which to communicate 1298 . unit - data type associated with each node 1299 . rootdata - buffer to broadcast 1300 - op - operation to use for reduction 1301 1302 Output Arguments: 1303 . leafdata - buffer to be reduced with values from each leaf's respective root 1304 1305 Level: intermediate 1306 1307 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1308 @*/ 1309 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1310 { 1311 PetscErrorCode ierr; 1312 PetscMemType rootmtype,leafmtype; 1313 1314 PetscFunctionBegin; 1315 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1316 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1317 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1318 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1319 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1320 #if defined(PETSC_HAVE_CUDA) 1321 /* Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)? 1322 To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream. 1323 But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if 1324 we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also 1325 computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize 1326 is inevitable. We do it now and put pack/unpack kernels to non-NULL streams. 1327 */ 1328 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1329 #endif 1330 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1331 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 /*@C 1336 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1337 1338 Collective 1339 1340 Input Arguments: 1341 + sf - star forest 1342 . unit - data type 1343 . rootdata - buffer to broadcast 1344 - op - operation to use for reduction 1345 1346 Output Arguments: 1347 . leafdata - buffer to be reduced with values from each leaf's respective root 1348 1349 Level: intermediate 1350 1351 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1352 @*/ 1353 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1354 { 1355 PetscErrorCode ierr; 1356 PetscMemType rootmtype,leafmtype; 1357 1358 PetscFunctionBegin; 1359 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1360 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1361 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1362 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1363 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1364 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1365 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 /*@C 1370 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1371 1372 Collective 1373 1374 Input Arguments: 1375 + sf - star forest 1376 . unit - data type 1377 . leafdata - values to reduce 1378 - op - reduction operation 1379 1380 Output Arguments: 1381 . rootdata - result of reduction of values from all leaves of each root 1382 1383 Level: intermediate 1384 1385 .seealso: PetscSFBcastBegin() 1386 @*/ 1387 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1388 { 1389 PetscErrorCode ierr; 1390 PetscMemType rootmtype,leafmtype; 1391 1392 PetscFunctionBegin; 1393 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1394 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1395 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1396 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1397 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1398 #if defined(PETSC_HAVE_CUDA) 1399 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1400 #endif 1401 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1402 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /*@C 1407 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1408 1409 Collective 1410 1411 Input Arguments: 1412 + sf - star forest 1413 . unit - data type 1414 . leafdata - values to reduce 1415 - op - reduction operation 1416 1417 Output Arguments: 1418 . rootdata - result of reduction of values from all leaves of each root 1419 1420 Level: intermediate 1421 1422 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1423 @*/ 1424 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1425 { 1426 PetscErrorCode ierr; 1427 PetscMemType rootmtype,leafmtype; 1428 1429 PetscFunctionBegin; 1430 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1431 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1432 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1433 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1434 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1435 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1436 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 /*@C 1441 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1442 1443 Collective 1444 1445 Input Arguments: 1446 + sf - star forest 1447 . unit - data type 1448 . leafdata - leaf values to use in reduction 1449 - op - operation to use for reduction 1450 1451 Output Arguments: 1452 + rootdata - root values to be updated, input state is seen by first process to perform an update 1453 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1454 1455 Level: advanced 1456 1457 Note: 1458 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1459 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1460 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1461 integers. 1462 1463 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1464 @*/ 1465 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1466 { 1467 PetscErrorCode ierr; 1468 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1469 1470 PetscFunctionBegin; 1471 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1472 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1473 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1474 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1475 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1476 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1477 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1478 #if defined(PETSC_HAVE_CUDA) 1479 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1480 #endif 1481 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1482 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 /*@C 1487 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1488 1489 Collective 1490 1491 Input Arguments: 1492 + sf - star forest 1493 . unit - data type 1494 . leafdata - leaf values to use in reduction 1495 - op - operation to use for reduction 1496 1497 Output Arguments: 1498 + rootdata - root values to be updated, input state is seen by first process to perform an update 1499 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1500 1501 Level: advanced 1502 1503 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1504 @*/ 1505 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1506 { 1507 PetscErrorCode ierr; 1508 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1509 1510 PetscFunctionBegin; 1511 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1512 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1513 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1514 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1515 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1516 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1517 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1518 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1519 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 /*@C 1524 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1525 1526 Collective 1527 1528 Input Arguments: 1529 . sf - star forest 1530 1531 Output Arguments: 1532 . degree - degree of each root vertex 1533 1534 Level: advanced 1535 1536 Notes: 1537 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1538 1539 .seealso: PetscSFGatherBegin() 1540 @*/ 1541 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1542 { 1543 PetscErrorCode ierr; 1544 1545 PetscFunctionBegin; 1546 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1547 PetscSFCheckGraphSet(sf,1); 1548 PetscValidPointer(degree,2); 1549 if (!sf->degreeknown) { 1550 PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1551 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1552 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1553 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1554 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1555 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1556 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1557 } 1558 *degree = NULL; 1559 PetscFunctionReturn(0); 1560 } 1561 1562 /*@C 1563 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1564 1565 Collective 1566 1567 Input Arguments: 1568 . sf - star forest 1569 1570 Output Arguments: 1571 . degree - degree of each root vertex 1572 1573 Level: developer 1574 1575 Notes: 1576 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1577 1578 .seealso: 1579 @*/ 1580 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1581 { 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1586 PetscSFCheckGraphSet(sf,1); 1587 PetscValidPointer(degree,2); 1588 if (!sf->degreeknown) { 1589 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1590 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1591 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1592 sf->degreeknown = PETSC_TRUE; 1593 } 1594 *degree = sf->degree; 1595 PetscFunctionReturn(0); 1596 } 1597 1598 1599 /*@C 1600 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1601 Each multi-root is assigned index of the corresponding original root. 1602 1603 Collective 1604 1605 Input Arguments: 1606 + sf - star forest 1607 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1608 1609 Output Arguments: 1610 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1611 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1612 1613 Level: developer 1614 1615 Notes: 1616 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1617 1618 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1619 @*/ 1620 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1621 { 1622 PetscSF msf; 1623 PetscInt i, j, k, nroots, nmroots; 1624 PetscErrorCode ierr; 1625 1626 PetscFunctionBegin; 1627 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1628 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1629 if (nroots) PetscValidIntPointer(degree,2); 1630 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1631 PetscValidPointer(multiRootsOrigNumbering,4); 1632 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1633 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1634 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1635 for (i=0,j=0,k=0; i<nroots; i++) { 1636 if (!degree[i]) continue; 1637 for (j=0; j<degree[i]; j++,k++) { 1638 (*multiRootsOrigNumbering)[k] = i; 1639 } 1640 } 1641 #if defined(PETSC_USE_DEBUG) 1642 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1643 #endif 1644 if (nMultiRoots) *nMultiRoots = nmroots; 1645 PetscFunctionReturn(0); 1646 } 1647 1648 /*@C 1649 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1650 1651 Collective 1652 1653 Input Arguments: 1654 + sf - star forest 1655 . unit - data type 1656 - leafdata - leaf data to gather to roots 1657 1658 Output Argument: 1659 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1660 1661 Level: intermediate 1662 1663 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1664 @*/ 1665 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1666 { 1667 PetscErrorCode ierr; 1668 PetscSF multi; 1669 1670 PetscFunctionBegin; 1671 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1672 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1673 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1674 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 /*@C 1679 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1680 1681 Collective 1682 1683 Input Arguments: 1684 + sf - star forest 1685 . unit - data type 1686 - leafdata - leaf data to gather to roots 1687 1688 Output Argument: 1689 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1690 1691 Level: intermediate 1692 1693 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1694 @*/ 1695 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1696 { 1697 PetscErrorCode ierr; 1698 PetscSF multi; 1699 1700 PetscFunctionBegin; 1701 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1702 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1703 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1704 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 /*@C 1709 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1710 1711 Collective 1712 1713 Input Arguments: 1714 + sf - star forest 1715 . unit - data type 1716 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1717 1718 Output Argument: 1719 . leafdata - leaf data to be update with personal data from each respective root 1720 1721 Level: intermediate 1722 1723 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1724 @*/ 1725 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1726 { 1727 PetscErrorCode ierr; 1728 PetscSF multi; 1729 1730 PetscFunctionBegin; 1731 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1732 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1733 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1734 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 1738 /*@C 1739 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1740 1741 Collective 1742 1743 Input Arguments: 1744 + sf - star forest 1745 . unit - data type 1746 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1747 1748 Output Argument: 1749 . leafdata - leaf data to be update with personal data from each respective root 1750 1751 Level: intermediate 1752 1753 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1754 @*/ 1755 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1756 { 1757 PetscErrorCode ierr; 1758 PetscSF multi; 1759 1760 PetscFunctionBegin; 1761 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1762 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1763 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1764 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1769 { 1770 #if defined(PETSC_USE_DEBUG) 1771 PetscInt i, n, nleaves; 1772 const PetscInt *ilocal = NULL; 1773 PetscHSetI seen; 1774 PetscErrorCode ierr; 1775 1776 PetscFunctionBegin; 1777 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1778 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1779 for (i = 0; i < nleaves; i++) { 1780 const PetscInt leaf = ilocal ? ilocal[i] : i; 1781 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1782 } 1783 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1784 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1785 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 #else 1788 PetscFunctionBegin; 1789 PetscFunctionReturn(0); 1790 #endif 1791 } 1792 1793 /*@ 1794 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1795 1796 Input Parameters: 1797 + sfA - The first PetscSF 1798 - sfB - The second PetscSF 1799 1800 Output Parameters: 1801 . sfBA - The composite SF 1802 1803 Level: developer 1804 1805 Notes: 1806 Currently, the two SFs must be defined on congruent communicators and they must be true star 1807 forests, i.e. the same leaf is not connected with different roots. 1808 1809 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1810 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1811 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1812 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1813 1814 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1815 @*/ 1816 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1817 { 1818 PetscErrorCode ierr; 1819 const PetscSFNode *remotePointsA,*remotePointsB; 1820 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1821 const PetscInt *localPointsA,*localPointsB; 1822 PetscInt *localPointsBA; 1823 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1824 PetscBool denseB; 1825 1826 PetscFunctionBegin; 1827 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1828 PetscSFCheckGraphSet(sfA,1); 1829 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1830 PetscSFCheckGraphSet(sfB,2); 1831 PetscCheckSameComm(sfA,1,sfB,2); 1832 PetscValidPointer(sfBA,3); 1833 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1834 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1835 1836 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1837 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1838 if (localPointsA) { 1839 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1840 for (i=0; i<numRootsB; i++) { 1841 reorderedRemotePointsA[i].rank = -1; 1842 reorderedRemotePointsA[i].index = -1; 1843 } 1844 for (i=0; i<numLeavesA; i++) { 1845 if (localPointsA[i] >= numRootsB) continue; 1846 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1847 } 1848 remotePointsA = reorderedRemotePointsA; 1849 } 1850 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1851 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1852 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1853 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1854 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1855 1856 denseB = (PetscBool)!localPointsB; 1857 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1858 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1859 else numLeavesBA++; 1860 } 1861 if (denseB) { 1862 localPointsBA = NULL; 1863 remotePointsBA = leafdataB; 1864 } else { 1865 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1866 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1867 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1868 const PetscInt l = localPointsB ? localPointsB[i] : i; 1869 1870 if (leafdataB[l-minleaf].rank == -1) continue; 1871 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1872 localPointsBA[numLeavesBA] = l; 1873 numLeavesBA++; 1874 } 1875 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1876 } 1877 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1878 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1879 PetscFunctionReturn(0); 1880 } 1881 1882 /*@ 1883 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1884 1885 Input Parameters: 1886 + sfA - The first PetscSF 1887 - sfB - The second PetscSF 1888 1889 Output Parameters: 1890 . sfBA - The composite SF. 1891 1892 Level: developer 1893 1894 Notes: 1895 Currently, the two SFs must be defined on congruent communicators and they must be true star 1896 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1897 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1898 1899 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1900 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1901 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1902 a Reduce on sfB, on connected roots. 1903 1904 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1905 @*/ 1906 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1907 { 1908 PetscErrorCode ierr; 1909 const PetscSFNode *remotePointsA,*remotePointsB; 1910 PetscSFNode *remotePointsBA; 1911 const PetscInt *localPointsA,*localPointsB; 1912 PetscSFNode *reorderedRemotePointsA = NULL; 1913 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1914 1915 PetscFunctionBegin; 1916 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1917 PetscSFCheckGraphSet(sfA,1); 1918 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1919 PetscSFCheckGraphSet(sfB,2); 1920 PetscCheckSameComm(sfA,1,sfB,2); 1921 PetscValidPointer(sfBA,3); 1922 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1923 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1924 /* TODO: Check roots of sfB have degree of 1 */ 1925 1926 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1927 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1928 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1929 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1930 for (i=0; i<maxleaf - minleaf + 1; i++) { 1931 reorderedRemotePointsA[i].rank = -1; 1932 reorderedRemotePointsA[i].index = -1; 1933 } 1934 if (localPointsA) { 1935 for (i=0; i<numLeavesA; i++) { 1936 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1937 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1938 } 1939 } else { 1940 for (i=0; i<numLeavesA; i++) { 1941 if (i > maxleaf || i < minleaf) continue; 1942 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1943 } 1944 } 1945 1946 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1947 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1948 for (i=0; i<numRootsB; i++) { 1949 remotePointsBA[i].rank = -1; 1950 remotePointsBA[i].index = -1; 1951 } 1952 1953 /* Once we implement the TODO above (check all roots of sfB have degree of 1), we can replace the MPI_MAXLOC 1954 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1955 */ 1956 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr); 1957 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr); 1958 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1959 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1960 if (remotePointsBA[i].rank == -1) continue; 1961 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1962 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1963 localPointsBA[numLeavesBA] = i; 1964 numLeavesBA++; 1965 } 1966 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1967 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /* 1972 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1973 1974 Input Parameters: 1975 . sf - The global PetscSF 1976 1977 Output Parameters: 1978 . out - The local PetscSF 1979 */ 1980 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1981 { 1982 MPI_Comm comm; 1983 PetscMPIInt myrank; 1984 const PetscInt *ilocal; 1985 const PetscSFNode *iremote; 1986 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1987 PetscSFNode *liremote; 1988 PetscSF lsf; 1989 PetscErrorCode ierr; 1990 1991 PetscFunctionBegin; 1992 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1993 if (sf->ops->CreateLocalSF) { 1994 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1995 } else { 1996 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 1997 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1998 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 1999 2000 /* Find out local edges and build a local SF */ 2001 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2002 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2003 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2004 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2005 2006 for (i=j=0; i<nleaves; i++) { 2007 if (iremote[i].rank == (PetscInt)myrank) { 2008 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2009 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2010 liremote[j].index = iremote[i].index; 2011 j++; 2012 } 2013 } 2014 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2015 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2016 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2017 *out = lsf; 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 2022 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2023 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2024 { 2025 PetscErrorCode ierr; 2026 PetscMemType rootmtype,leafmtype; 2027 2028 PetscFunctionBegin; 2029 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2030 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2031 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2032 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2033 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2034 if (sf->ops->BcastToZero) { 2035 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2036 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2037 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041