1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 { 98 PetscInt i; 99 for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}} 100 } 101 #endif 102 sf->degreeknown = PETSC_FALSE; 103 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 104 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 105 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 106 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 107 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 108 sf->setupcalled = PETSC_FALSE; 109 PetscFunctionReturn(0); 110 } 111 112 /*@C 113 PetscSFSetType - Set the PetscSF communication implementation 114 115 Collective on PetscSF 116 117 Input Parameters: 118 + sf - the PetscSF context 119 - type - a known method 120 121 Options Database Key: 122 . -sf_type <type> - Sets the method; use -help for a list 123 of available methods (for instance, window, basic, neighbor) 124 125 Notes: 126 See "include/petscsf.h" for available methods (for instance) 127 + PETSCSFWINDOW - MPI-2/3 one-sided 128 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 129 130 Level: intermediate 131 132 .seealso: PetscSFType, PetscSFCreate() 133 @*/ 134 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 135 { 136 PetscErrorCode ierr,(*r)(PetscSF); 137 PetscBool match; 138 139 PetscFunctionBegin; 140 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 141 PetscValidCharPointer(type,2); 142 143 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 144 if (match) PetscFunctionReturn(0); 145 146 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 147 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 148 /* Destroy the previous PetscSF implementation context */ 149 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 150 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 151 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 152 ierr = (*r)(sf);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 /*@C 157 PetscSFGetType - Get the PetscSF communication implementation 158 159 Not Collective 160 161 Input Parameter: 162 . sf - the PetscSF context 163 164 Output Parameter: 165 . type - the PetscSF type name 166 167 Level: intermediate 168 169 .seealso: PetscSFSetType(), PetscSFCreate() 170 @*/ 171 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 172 { 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 175 PetscValidPointer(type,2); 176 *type = ((PetscObject)sf)->type_name; 177 PetscFunctionReturn(0); 178 } 179 180 /*@ 181 PetscSFDestroy - destroy star forest 182 183 Collective 184 185 Input Arguments: 186 . sf - address of star forest 187 188 Level: intermediate 189 190 .seealso: PetscSFCreate(), PetscSFReset() 191 @*/ 192 PetscErrorCode PetscSFDestroy(PetscSF *sf) 193 { 194 PetscErrorCode ierr; 195 196 PetscFunctionBegin; 197 if (!*sf) PetscFunctionReturn(0); 198 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 199 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 200 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 201 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 202 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 207 { 208 #if defined(PETSC_USE_DEBUG) 209 PetscInt i, nleaves; 210 PetscMPIInt size; 211 const PetscInt *ilocal; 212 const PetscSFNode *iremote; 213 PetscErrorCode ierr; 214 215 PetscFunctionBegin; 216 if (!sf->graphset) PetscFunctionReturn(0); 217 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 218 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 219 for (i = 0; i < nleaves; i++) { 220 const PetscInt rank = iremote[i].rank; 221 const PetscInt remote = iremote[i].index; 222 const PetscInt leaf = ilocal ? ilocal[i] : i; 223 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 224 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 225 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 226 } 227 PetscFunctionReturn(0); 228 #else 229 PetscFunctionBegin; 230 PetscFunctionReturn(0); 231 #endif 232 } 233 234 /*@ 235 PetscSFSetUp - set up communication structures 236 237 Collective 238 239 Input Arguments: 240 . sf - star forest communication object 241 242 Level: beginner 243 244 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 245 @*/ 246 PetscErrorCode PetscSFSetUp(PetscSF sf) 247 { 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 252 PetscSFCheckGraphSet(sf,1); 253 if (sf->setupcalled) PetscFunctionReturn(0); 254 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 255 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 256 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 257 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 258 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 259 sf->setupcalled = PETSC_TRUE; 260 PetscFunctionReturn(0); 261 } 262 263 /*@ 264 PetscSFSetFromOptions - set PetscSF options using the options database 265 266 Logically Collective 267 268 Input Arguments: 269 . sf - star forest 270 271 Options Database Keys: 272 + -sf_type - implementation type, see PetscSFSetType() 273 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 274 - -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 275 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller. 276 277 Level: intermediate 278 @*/ 279 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 280 { 281 PetscSFType deft; 282 char type[256]; 283 PetscErrorCode ierr; 284 PetscBool flg; 285 286 PetscFunctionBegin; 287 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 288 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 289 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 290 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 291 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 292 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 293 sf->use_default_stream = PETSC_FALSE; 294 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 295 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 296 ierr = PetscOptionsEnd();CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 /*@ 301 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 302 303 Logically Collective 304 305 Input Arguments: 306 + sf - star forest 307 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 308 309 Level: advanced 310 311 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 312 @*/ 313 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 314 { 315 PetscFunctionBegin; 316 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 317 PetscValidLogicalCollectiveBool(sf,flg,2); 318 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 319 sf->rankorder = flg; 320 PetscFunctionReturn(0); 321 } 322 323 /*@ 324 PetscSFSetGraph - Set a parallel star forest 325 326 Collective 327 328 Input Arguments: 329 + sf - star forest 330 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 331 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 332 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 333 during setup in debug mode) 334 . localmode - copy mode for ilocal 335 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 336 during setup in debug mode) 337 - remotemode - copy mode for iremote 338 339 Level: intermediate 340 341 Notes: 342 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 343 344 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 345 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 346 needed 347 348 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 349 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 350 351 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 352 @*/ 353 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 354 { 355 PetscErrorCode ierr; 356 357 PetscFunctionBegin; 358 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 359 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 360 if (nleaves > 0) PetscValidPointer(iremote,6); 361 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 362 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 363 364 if (sf->nroots >= 0) { /* Reset only if graph already set */ 365 ierr = PetscSFReset(sf);CHKERRQ(ierr); 366 } 367 368 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 369 370 sf->nroots = nroots; 371 sf->nleaves = nleaves; 372 373 if (nleaves && ilocal) { 374 PetscInt i; 375 PetscInt minleaf = PETSC_MAX_INT; 376 PetscInt maxleaf = PETSC_MIN_INT; 377 int contiguous = 1; 378 for (i=0; i<nleaves; i++) { 379 minleaf = PetscMin(minleaf,ilocal[i]); 380 maxleaf = PetscMax(maxleaf,ilocal[i]); 381 contiguous &= (ilocal[i] == i); 382 } 383 sf->minleaf = minleaf; 384 sf->maxleaf = maxleaf; 385 if (contiguous) { 386 if (localmode == PETSC_OWN_POINTER) { 387 ierr = PetscFree(ilocal);CHKERRQ(ierr); 388 } 389 ilocal = NULL; 390 } 391 } else { 392 sf->minleaf = 0; 393 sf->maxleaf = nleaves - 1; 394 } 395 396 if (ilocal) { 397 switch (localmode) { 398 case PETSC_COPY_VALUES: 399 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 400 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 401 sf->mine = sf->mine_alloc; 402 break; 403 case PETSC_OWN_POINTER: 404 sf->mine_alloc = (PetscInt*)ilocal; 405 sf->mine = sf->mine_alloc; 406 break; 407 case PETSC_USE_POINTER: 408 sf->mine_alloc = NULL; 409 sf->mine = (PetscInt*)ilocal; 410 break; 411 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 412 } 413 } 414 415 switch (remotemode) { 416 case PETSC_COPY_VALUES: 417 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 418 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 419 sf->remote = sf->remote_alloc; 420 break; 421 case PETSC_OWN_POINTER: 422 sf->remote_alloc = (PetscSFNode*)iremote; 423 sf->remote = sf->remote_alloc; 424 break; 425 case PETSC_USE_POINTER: 426 sf->remote_alloc = NULL; 427 sf->remote = (PetscSFNode*)iremote; 428 break; 429 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 430 } 431 432 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 433 sf->graphset = PETSC_TRUE; 434 PetscFunctionReturn(0); 435 } 436 437 /*@ 438 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 439 440 Collective 441 442 Input Parameters: 443 + sf - The PetscSF 444 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 445 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 446 447 Notes: 448 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 449 n and N are local and global sizes of x respectively. 450 451 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 452 sequential vectors y on all ranks. 453 454 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 455 sequential vector y on rank 0. 456 457 In above cases, entries of x are roots and entries of y are leaves. 458 459 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 460 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 461 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 462 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 463 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 464 465 In this case, roots and leaves are symmetric. 466 467 Level: intermediate 468 @*/ 469 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 470 { 471 MPI_Comm comm; 472 PetscInt n,N,res[2]; 473 PetscMPIInt rank,size; 474 PetscSFType type; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 479 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 480 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 481 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 482 483 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 484 type = PETSCSFALLTOALL; 485 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 486 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 487 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 488 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 489 } else { 490 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 491 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 492 res[0] = n; 493 res[1] = -n; 494 /* Check if n are same over all ranks so that we can optimize it */ 495 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 496 if (res[0] == -res[1]) { /* same n */ 497 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 498 } else { 499 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 500 } 501 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 502 } 503 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 504 505 sf->pattern = pattern; 506 sf->mine = NULL; /* Contiguous */ 507 508 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 509 Also set other easy stuff. 510 */ 511 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 512 sf->nleaves = N; 513 sf->nroots = n; 514 sf->nranks = size; 515 sf->minleaf = 0; 516 sf->maxleaf = N - 1; 517 } else if (pattern == PETSCSF_PATTERN_GATHER) { 518 sf->nleaves = rank ? 0 : N; 519 sf->nroots = n; 520 sf->nranks = rank ? 0 : size; 521 sf->minleaf = 0; 522 sf->maxleaf = rank ? -1 : N - 1; 523 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 524 sf->nleaves = size; 525 sf->nroots = size; 526 sf->nranks = size; 527 sf->minleaf = 0; 528 sf->maxleaf = size - 1; 529 } 530 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 531 sf->graphset = PETSC_TRUE; 532 PetscFunctionReturn(0); 533 } 534 535 /*@ 536 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 537 538 Collective 539 540 Input Arguments: 541 542 . sf - star forest to invert 543 544 Output Arguments: 545 . isf - inverse of sf 546 Level: advanced 547 548 Notes: 549 All roots must have degree 1. 550 551 The local space may be a permutation, but cannot be sparse. 552 553 .seealso: PetscSFSetGraph() 554 @*/ 555 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 556 { 557 PetscErrorCode ierr; 558 PetscMPIInt rank; 559 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 560 const PetscInt *ilocal; 561 PetscSFNode *roots,*leaves; 562 563 PetscFunctionBegin; 564 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 565 PetscSFCheckGraphSet(sf,1); 566 PetscValidPointer(isf,2); 567 568 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 569 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 570 571 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 572 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 573 for (i=0; i<maxlocal; i++) { 574 leaves[i].rank = rank; 575 leaves[i].index = i; 576 } 577 for (i=0; i <nroots; i++) { 578 roots[i].rank = -1; 579 roots[i].index = -1; 580 } 581 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 582 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 583 584 /* Check whether our leaves are sparse */ 585 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 586 if (count == nroots) newilocal = NULL; 587 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 588 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 589 for (i=0,count=0; i<nroots; i++) { 590 if (roots[i].rank >= 0) { 591 newilocal[count] = i; 592 roots[count].rank = roots[i].rank; 593 roots[count].index = roots[i].index; 594 count++; 595 } 596 } 597 } 598 599 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 600 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 601 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 /*@ 606 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 607 608 Collective 609 610 Input Arguments: 611 + sf - communication object to duplicate 612 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 613 614 Output Arguments: 615 . newsf - new communication object 616 617 Level: beginner 618 619 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 620 @*/ 621 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 622 { 623 PetscSFType type; 624 PetscErrorCode ierr; 625 626 PetscFunctionBegin; 627 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 628 PetscValidLogicalCollectiveEnum(sf,opt,2); 629 PetscValidPointer(newsf,3); 630 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 631 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 632 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 633 if (opt == PETSCSF_DUPLICATE_GRAPH) { 634 PetscSFCheckGraphSet(sf,1); 635 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 636 PetscInt nroots,nleaves; 637 const PetscInt *ilocal; 638 const PetscSFNode *iremote; 639 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 640 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 641 } else { 642 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 643 } 644 } 645 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 646 PetscFunctionReturn(0); 647 } 648 649 /*@C 650 PetscSFGetGraph - Get the graph specifying a parallel star forest 651 652 Not Collective 653 654 Input Arguments: 655 . sf - star forest 656 657 Output Arguments: 658 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 659 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 660 . ilocal - locations of leaves in leafdata buffers 661 - iremote - remote locations of root vertices for each leaf on the current process 662 663 Notes: 664 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 665 666 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 667 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 668 669 Level: intermediate 670 671 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 672 @*/ 673 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 674 { 675 PetscErrorCode ierr; 676 677 PetscFunctionBegin; 678 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 679 if (sf->ops->GetGraph) { 680 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 681 } else { 682 if (nroots) *nroots = sf->nroots; 683 if (nleaves) *nleaves = sf->nleaves; 684 if (ilocal) *ilocal = sf->mine; 685 if (iremote) *iremote = sf->remote; 686 } 687 PetscFunctionReturn(0); 688 } 689 690 /*@ 691 PetscSFGetLeafRange - Get the active leaf ranges 692 693 Not Collective 694 695 Input Arguments: 696 . sf - star forest 697 698 Output Arguments: 699 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 700 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 701 702 Level: developer 703 704 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 705 @*/ 706 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 707 { 708 709 PetscFunctionBegin; 710 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 711 PetscSFCheckGraphSet(sf,1); 712 if (minleaf) *minleaf = sf->minleaf; 713 if (maxleaf) *maxleaf = sf->maxleaf; 714 PetscFunctionReturn(0); 715 } 716 717 /*@C 718 PetscSFViewFromOptions - View from Options 719 720 Collective on PetscSF 721 722 Input Parameters: 723 + A - the star forest 724 . obj - Optional object 725 - name - command line option 726 727 Level: intermediate 728 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 729 @*/ 730 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 731 { 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 736 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 /*@C 741 PetscSFView - view a star forest 742 743 Collective 744 745 Input Arguments: 746 + sf - star forest 747 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 748 749 Level: beginner 750 751 .seealso: PetscSFCreate(), PetscSFSetGraph() 752 @*/ 753 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 754 { 755 PetscErrorCode ierr; 756 PetscBool iascii; 757 PetscViewerFormat format; 758 759 PetscFunctionBegin; 760 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 761 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 762 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 763 PetscCheckSameComm(sf,1,viewer,2); 764 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 765 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 766 if (iascii) { 767 PetscMPIInt rank; 768 PetscInt ii,i,j; 769 770 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 771 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 772 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 773 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 774 if (!sf->graphset) { 775 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 776 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 780 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 781 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 782 for (i=0; i<sf->nleaves; i++) { 783 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 784 } 785 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 786 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 787 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 788 PetscMPIInt *tmpranks,*perm; 789 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 790 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 791 for (i=0; i<sf->nranks; i++) perm[i] = i; 792 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 794 for (ii=0; ii<sf->nranks; ii++) { 795 i = perm[ii]; 796 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 797 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 798 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 799 } 800 } 801 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 802 } 803 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 804 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 805 } 806 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 /*@C 812 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 813 814 Not Collective 815 816 Input Arguments: 817 . sf - star forest 818 819 Output Arguments: 820 + nranks - number of ranks referenced by local part 821 . ranks - array of ranks 822 . roffset - offset in rmine/rremote for each rank (length nranks+1) 823 . rmine - concatenated array holding local indices referencing each remote rank 824 - rremote - concatenated array holding remote indices referenced for each remote rank 825 826 Level: developer 827 828 .seealso: PetscSFGetLeafRanks() 829 @*/ 830 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 831 { 832 PetscErrorCode ierr; 833 834 PetscFunctionBegin; 835 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 836 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 837 if (sf->ops->GetRootRanks) { 838 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 839 } else { 840 /* The generic implementation */ 841 if (nranks) *nranks = sf->nranks; 842 if (ranks) *ranks = sf->ranks; 843 if (roffset) *roffset = sf->roffset; 844 if (rmine) *rmine = sf->rmine; 845 if (rremote) *rremote = sf->rremote; 846 } 847 PetscFunctionReturn(0); 848 } 849 850 /*@C 851 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 852 853 Not Collective 854 855 Input Arguments: 856 . sf - star forest 857 858 Output Arguments: 859 + niranks - number of leaf ranks referencing roots on this process 860 . iranks - array of ranks 861 . ioffset - offset in irootloc for each rank (length niranks+1) 862 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 863 864 Level: developer 865 866 .seealso: PetscSFGetRootRanks() 867 @*/ 868 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 869 { 870 PetscErrorCode ierr; 871 872 PetscFunctionBegin; 873 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 874 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 875 if (sf->ops->GetLeafRanks) { 876 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 877 } else { 878 PetscSFType type; 879 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 880 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 881 } 882 PetscFunctionReturn(0); 883 } 884 885 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 886 PetscInt i; 887 for (i=0; i<n; i++) { 888 if (needle == list[i]) return PETSC_TRUE; 889 } 890 return PETSC_FALSE; 891 } 892 893 /*@C 894 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 895 896 Collective 897 898 Input Arguments: 899 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 900 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 901 902 Level: developer 903 904 .seealso: PetscSFGetRootRanks() 905 @*/ 906 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 907 { 908 PetscErrorCode ierr; 909 PetscTable table; 910 PetscTablePosition pos; 911 PetscMPIInt size,groupsize,*groupranks; 912 PetscInt *rcount,*ranks; 913 PetscInt i, irank = -1,orank = -1; 914 915 PetscFunctionBegin; 916 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 917 PetscSFCheckGraphSet(sf,1); 918 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 919 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 920 for (i=0; i<sf->nleaves; i++) { 921 /* Log 1-based rank */ 922 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 923 } 924 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 925 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 926 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 927 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 928 for (i=0; i<sf->nranks; i++) { 929 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 930 ranks[i]--; /* Convert back to 0-based */ 931 } 932 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 933 934 /* We expect that dgroup is reliably "small" while nranks could be large */ 935 { 936 MPI_Group group = MPI_GROUP_NULL; 937 PetscMPIInt *dgroupranks; 938 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 939 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 940 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 941 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 942 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 943 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 944 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 945 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 946 } 947 948 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 949 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 950 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 951 if (InList(ranks[i],groupsize,groupranks)) break; 952 } 953 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 954 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 955 } 956 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 957 PetscInt tmprank,tmpcount; 958 959 tmprank = ranks[i]; 960 tmpcount = rcount[i]; 961 ranks[i] = ranks[sf->ndranks]; 962 rcount[i] = rcount[sf->ndranks]; 963 ranks[sf->ndranks] = tmprank; 964 rcount[sf->ndranks] = tmpcount; 965 sf->ndranks++; 966 } 967 } 968 ierr = PetscFree(groupranks);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 970 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 971 sf->roffset[0] = 0; 972 for (i=0; i<sf->nranks; i++) { 973 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 974 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 975 rcount[i] = 0; 976 } 977 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 978 /* short circuit */ 979 if (orank != sf->remote[i].rank) { 980 /* Search for index of iremote[i].rank in sf->ranks */ 981 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 982 if (irank < 0) { 983 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 984 if (irank >= 0) irank += sf->ndranks; 985 } 986 orank = sf->remote[i].rank; 987 } 988 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 989 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 990 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 991 rcount[irank]++; 992 } 993 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 /*@C 998 PetscSFGetGroups - gets incoming and outgoing process groups 999 1000 Collective 1001 1002 Input Argument: 1003 . sf - star forest 1004 1005 Output Arguments: 1006 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1007 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1008 1009 Level: developer 1010 1011 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1012 @*/ 1013 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1014 { 1015 PetscErrorCode ierr; 1016 MPI_Group group = MPI_GROUP_NULL; 1017 1018 PetscFunctionBegin; 1019 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1020 if (sf->ingroup == MPI_GROUP_NULL) { 1021 PetscInt i; 1022 const PetscInt *indegree; 1023 PetscMPIInt rank,*outranks,*inranks; 1024 PetscSFNode *remote; 1025 PetscSF bgcount; 1026 1027 /* Compute the number of incoming ranks */ 1028 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1029 for (i=0; i<sf->nranks; i++) { 1030 remote[i].rank = sf->ranks[i]; 1031 remote[i].index = 0; 1032 } 1033 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1034 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1035 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1036 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1037 /* Enumerate the incoming ranks */ 1038 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1039 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1040 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1041 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1042 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1043 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1044 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1045 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1046 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1047 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1048 } 1049 *incoming = sf->ingroup; 1050 1051 if (sf->outgroup == MPI_GROUP_NULL) { 1052 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1053 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1054 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1055 } 1056 *outgoing = sf->outgroup; 1057 PetscFunctionReturn(0); 1058 } 1059 1060 /*@ 1061 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1062 1063 Collective 1064 1065 Input Argument: 1066 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1067 1068 Output Arguments: 1069 . multi - star forest with split roots, such that each root has degree exactly 1 1070 1071 Level: developer 1072 1073 Notes: 1074 1075 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1076 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1077 edge, it is a candidate for future optimization that might involve its removal. 1078 1079 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1080 @*/ 1081 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1082 { 1083 PetscErrorCode ierr; 1084 1085 PetscFunctionBegin; 1086 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1087 PetscValidPointer(multi,2); 1088 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1089 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1090 *multi = sf->multi; 1091 PetscFunctionReturn(0); 1092 } 1093 if (!sf->multi) { 1094 const PetscInt *indegree; 1095 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1096 PetscSFNode *remote; 1097 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1098 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1099 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1100 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1101 inoffset[0] = 0; 1102 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1103 for (i=0; i<maxlocal; i++) outones[i] = 1; 1104 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1105 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1106 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1107 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1108 for (i=0; i<sf->nroots; i++) { 1109 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1110 } 1111 #endif 1112 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1113 for (i=0; i<sf->nleaves; i++) { 1114 remote[i].rank = sf->remote[i].rank; 1115 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1116 } 1117 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1118 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1119 if (sf->rankorder) { /* Sort the ranks */ 1120 PetscMPIInt rank; 1121 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1122 PetscSFNode *newremote; 1123 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1124 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1125 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1126 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1127 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1128 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1129 /* Sort the incoming ranks at each vertex, build the inverse map */ 1130 for (i=0; i<sf->nroots; i++) { 1131 PetscInt j; 1132 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1133 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1134 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1135 } 1136 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1137 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1138 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1139 for (i=0; i<sf->nleaves; i++) { 1140 newremote[i].rank = sf->remote[i].rank; 1141 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1142 } 1143 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1144 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1145 } 1146 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1147 } 1148 *multi = sf->multi; 1149 PetscFunctionReturn(0); 1150 } 1151 1152 /*@C 1153 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1154 1155 Collective 1156 1157 Input Arguments: 1158 + sf - original star forest 1159 . nselected - number of selected roots on this process 1160 - selected - indices of the selected roots on this process 1161 1162 Output Arguments: 1163 . esf - new star forest 1164 1165 Level: advanced 1166 1167 Note: 1168 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1169 be done by calling PetscSFGetGraph(). 1170 1171 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1172 @*/ 1173 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1174 { 1175 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1176 const PetscInt *ilocal; 1177 signed char *rootdata,*leafdata,*leafmem; 1178 const PetscSFNode *iremote; 1179 PetscSFNode *new_iremote; 1180 MPI_Comm comm; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1185 PetscSFCheckGraphSet(sf,1); 1186 if (nselected) PetscValidPointer(selected,3); 1187 PetscValidPointer(esf,4); 1188 1189 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1190 1191 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1192 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1193 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1194 1195 #if defined(PETSC_USE_DEBUG) 1196 /* Error out if selected[] has dups or out of range indices */ 1197 { 1198 PetscBool dups; 1199 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1200 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1201 for (i=0; i<nselected; i++) 1202 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1203 } 1204 #endif 1205 1206 if (sf->ops->CreateEmbeddedSF) { 1207 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1208 } else { 1209 /* A generic version of creating embedded sf */ 1210 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1211 maxlocal = maxleaf - minleaf + 1; 1212 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1213 leafdata = leafmem - minleaf; 1214 /* Tag selected roots and bcast to leaves */ 1215 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1216 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1217 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1218 1219 /* Build esf with leaves that are still connected */ 1220 esf_nleaves = 0; 1221 for (i=0; i<nleaves; i++) { 1222 j = ilocal ? ilocal[i] : i; 1223 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1224 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1225 */ 1226 esf_nleaves += (leafdata[j] ? 1 : 0); 1227 } 1228 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1229 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1230 for (i=n=0; i<nleaves; i++) { 1231 j = ilocal ? ilocal[i] : i; 1232 if (leafdata[j]) { 1233 new_ilocal[n] = j; 1234 new_iremote[n].rank = iremote[i].rank; 1235 new_iremote[n].index = iremote[i].index; 1236 ++n; 1237 } 1238 } 1239 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1240 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1241 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1242 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1243 } 1244 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 /*@C 1249 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1250 1251 Collective 1252 1253 Input Arguments: 1254 + sf - original star forest 1255 . nselected - number of selected leaves on this process 1256 - selected - indices of the selected leaves on this process 1257 1258 Output Arguments: 1259 . newsf - new star forest 1260 1261 Level: advanced 1262 1263 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1264 @*/ 1265 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1266 { 1267 const PetscSFNode *iremote; 1268 PetscSFNode *new_iremote; 1269 const PetscInt *ilocal; 1270 PetscInt i,nroots,*leaves,*new_ilocal; 1271 MPI_Comm comm; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1276 PetscSFCheckGraphSet(sf,1); 1277 if (nselected) PetscValidPointer(selected,3); 1278 PetscValidPointer(newsf,4); 1279 1280 /* Uniq selected[] and put results in leaves[] */ 1281 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1282 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1283 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1284 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1285 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1286 1287 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1288 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1289 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1292 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1293 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1294 for (i=0; i<nselected; ++i) { 1295 const PetscInt l = leaves[i]; 1296 new_ilocal[i] = ilocal ? ilocal[l] : l; 1297 new_iremote[i].rank = iremote[l].rank; 1298 new_iremote[i].index = iremote[l].index; 1299 } 1300 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1301 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1302 } 1303 ierr = PetscFree(leaves);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 /*@C 1308 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1309 1310 Collective on PetscSF 1311 1312 Input Arguments: 1313 + sf - star forest on which to communicate 1314 . unit - data type associated with each node 1315 . rootdata - buffer to broadcast 1316 - op - operation to use for reduction 1317 1318 Output Arguments: 1319 . leafdata - buffer to be reduced with values from each leaf's respective root 1320 1321 Level: intermediate 1322 1323 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1324 @*/ 1325 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1326 { 1327 PetscErrorCode ierr; 1328 PetscMemType rootmtype,leafmtype; 1329 1330 PetscFunctionBegin; 1331 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1332 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1333 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1334 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1335 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1336 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1337 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /*@C 1342 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1343 1344 Collective 1345 1346 Input Arguments: 1347 + sf - star forest 1348 . unit - data type 1349 . rootdata - buffer to broadcast 1350 - op - operation to use for reduction 1351 1352 Output Arguments: 1353 . leafdata - buffer to be reduced with values from each leaf's respective root 1354 1355 Level: intermediate 1356 1357 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1358 @*/ 1359 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1360 { 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1365 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1366 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1367 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1368 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 /*@C 1373 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1374 1375 Collective 1376 1377 Input Arguments: 1378 + sf - star forest 1379 . unit - data type 1380 . leafdata - values to reduce 1381 - op - reduction operation 1382 1383 Output Arguments: 1384 . rootdata - result of reduction of values from all leaves of each root 1385 1386 Level: intermediate 1387 1388 .seealso: PetscSFBcastBegin() 1389 @*/ 1390 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1391 { 1392 PetscErrorCode ierr; 1393 PetscMemType rootmtype,leafmtype; 1394 1395 PetscFunctionBegin; 1396 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1397 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1398 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1399 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1400 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1401 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1402 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /*@C 1407 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1408 1409 Collective 1410 1411 Input Arguments: 1412 + sf - star forest 1413 . unit - data type 1414 . leafdata - values to reduce 1415 - op - reduction operation 1416 1417 Output Arguments: 1418 . rootdata - result of reduction of values from all leaves of each root 1419 1420 Level: intermediate 1421 1422 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1423 @*/ 1424 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1425 { 1426 PetscErrorCode ierr; 1427 1428 PetscFunctionBegin; 1429 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1430 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1431 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1432 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1433 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 /*@C 1438 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1439 1440 Collective 1441 1442 Input Arguments: 1443 + sf - star forest 1444 . unit - data type 1445 . leafdata - leaf values to use in reduction 1446 - op - operation to use for reduction 1447 1448 Output Arguments: 1449 + rootdata - root values to be updated, input state is seen by first process to perform an update 1450 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1451 1452 Level: advanced 1453 1454 Note: 1455 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1456 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1457 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1458 integers. 1459 1460 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1461 @*/ 1462 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1463 { 1464 PetscErrorCode ierr; 1465 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1466 1467 PetscFunctionBegin; 1468 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1469 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1470 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1471 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1472 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1473 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1474 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1475 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1476 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 /*@C 1481 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1482 1483 Collective 1484 1485 Input Arguments: 1486 + sf - star forest 1487 . unit - data type 1488 . leafdata - leaf values to use in reduction 1489 - op - operation to use for reduction 1490 1491 Output Arguments: 1492 + rootdata - root values to be updated, input state is seen by first process to perform an update 1493 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1494 1495 Level: advanced 1496 1497 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1498 @*/ 1499 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1500 { 1501 PetscErrorCode ierr; 1502 1503 PetscFunctionBegin; 1504 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1505 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1506 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1507 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1508 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 /*@C 1513 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1514 1515 Collective 1516 1517 Input Arguments: 1518 . sf - star forest 1519 1520 Output Arguments: 1521 . degree - degree of each root vertex 1522 1523 Level: advanced 1524 1525 Notes: 1526 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1527 1528 .seealso: PetscSFGatherBegin() 1529 @*/ 1530 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1531 { 1532 PetscErrorCode ierr; 1533 1534 PetscFunctionBegin; 1535 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1536 PetscSFCheckGraphSet(sf,1); 1537 PetscValidPointer(degree,2); 1538 if (!sf->degreeknown) { 1539 PetscInt i, nroots = sf->nroots, maxlocal; 1540 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1541 maxlocal = sf->maxleaf-sf->minleaf+1; 1542 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1543 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1544 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1545 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1546 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1547 } 1548 *degree = NULL; 1549 PetscFunctionReturn(0); 1550 } 1551 1552 /*@C 1553 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1554 1555 Collective 1556 1557 Input Arguments: 1558 . sf - star forest 1559 1560 Output Arguments: 1561 . degree - degree of each root vertex 1562 1563 Level: developer 1564 1565 Notes: 1566 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1567 1568 .seealso: 1569 @*/ 1570 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1571 { 1572 PetscErrorCode ierr; 1573 1574 PetscFunctionBegin; 1575 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1576 PetscSFCheckGraphSet(sf,1); 1577 PetscValidPointer(degree,2); 1578 if (!sf->degreeknown) { 1579 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1580 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1581 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1582 sf->degreeknown = PETSC_TRUE; 1583 } 1584 *degree = sf->degree; 1585 PetscFunctionReturn(0); 1586 } 1587 1588 1589 /*@C 1590 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1591 Each multi-root is assigned index of the corresponding original root. 1592 1593 Collective 1594 1595 Input Arguments: 1596 + sf - star forest 1597 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1598 1599 Output Arguments: 1600 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1601 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1602 1603 Level: developer 1604 1605 Notes: 1606 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1607 1608 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1609 @*/ 1610 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1611 { 1612 PetscSF msf; 1613 PetscInt i, j, k, nroots, nmroots; 1614 PetscErrorCode ierr; 1615 1616 PetscFunctionBegin; 1617 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1618 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1619 if (nroots) PetscValidIntPointer(degree,2); 1620 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1621 PetscValidPointer(multiRootsOrigNumbering,4); 1622 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1623 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1624 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1625 for (i=0,j=0,k=0; i<nroots; i++) { 1626 if (!degree[i]) continue; 1627 for (j=0; j<degree[i]; j++,k++) { 1628 (*multiRootsOrigNumbering)[k] = i; 1629 } 1630 } 1631 #if defined(PETSC_USE_DEBUG) 1632 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1633 #endif 1634 if (nMultiRoots) *nMultiRoots = nmroots; 1635 PetscFunctionReturn(0); 1636 } 1637 1638 /*@C 1639 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1640 1641 Collective 1642 1643 Input Arguments: 1644 + sf - star forest 1645 . unit - data type 1646 - leafdata - leaf data to gather to roots 1647 1648 Output Argument: 1649 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1650 1651 Level: intermediate 1652 1653 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1654 @*/ 1655 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1656 { 1657 PetscErrorCode ierr; 1658 PetscSF multi; 1659 1660 PetscFunctionBegin; 1661 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1662 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1663 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1664 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 /*@C 1669 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1670 1671 Collective 1672 1673 Input Arguments: 1674 + sf - star forest 1675 . unit - data type 1676 - leafdata - leaf data to gather to roots 1677 1678 Output Argument: 1679 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1680 1681 Level: intermediate 1682 1683 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1684 @*/ 1685 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1686 { 1687 PetscErrorCode ierr; 1688 PetscSF multi; 1689 1690 PetscFunctionBegin; 1691 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1692 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1693 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1694 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 /*@C 1699 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1700 1701 Collective 1702 1703 Input Arguments: 1704 + sf - star forest 1705 . unit - data type 1706 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1707 1708 Output Argument: 1709 . leafdata - leaf data to be update with personal data from each respective root 1710 1711 Level: intermediate 1712 1713 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1714 @*/ 1715 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1716 { 1717 PetscErrorCode ierr; 1718 PetscSF multi; 1719 1720 PetscFunctionBegin; 1721 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1722 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1723 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1724 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1725 PetscFunctionReturn(0); 1726 } 1727 1728 /*@C 1729 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1730 1731 Collective 1732 1733 Input Arguments: 1734 + sf - star forest 1735 . unit - data type 1736 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1737 1738 Output Argument: 1739 . leafdata - leaf data to be update with personal data from each respective root 1740 1741 Level: intermediate 1742 1743 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1744 @*/ 1745 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1746 { 1747 PetscErrorCode ierr; 1748 PetscSF multi; 1749 1750 PetscFunctionBegin; 1751 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1752 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1753 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1754 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 1758 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1759 { 1760 #if defined(PETSC_USE_DEBUG) 1761 PetscInt i, n, nleaves; 1762 const PetscInt *ilocal = NULL; 1763 PetscHSetI seen; 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1768 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1769 for (i = 0; i < nleaves; i++) { 1770 const PetscInt leaf = ilocal ? ilocal[i] : i; 1771 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1772 } 1773 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1774 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1775 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 #else 1778 PetscFunctionBegin; 1779 PetscFunctionReturn(0); 1780 #endif 1781 } 1782 1783 /*@ 1784 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1785 1786 Input Parameters: 1787 + sfA - The first PetscSF 1788 - sfB - The second PetscSF 1789 1790 Output Parameters: 1791 . sfBA - The composite SF 1792 1793 Level: developer 1794 1795 Notes: 1796 Currently, the two SFs must be defined on congruent communicators and they must be true star 1797 forests, i.e. the same leaf is not connected with different roots. 1798 1799 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1800 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1801 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1802 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1803 1804 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1805 @*/ 1806 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1807 { 1808 PetscErrorCode ierr; 1809 const PetscSFNode *remotePointsA,*remotePointsB; 1810 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1811 const PetscInt *localPointsA,*localPointsB; 1812 PetscInt *localPointsBA; 1813 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1814 PetscBool denseB; 1815 1816 PetscFunctionBegin; 1817 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1818 PetscSFCheckGraphSet(sfA,1); 1819 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1820 PetscSFCheckGraphSet(sfB,2); 1821 PetscCheckSameComm(sfA,1,sfB,2); 1822 PetscValidPointer(sfBA,3); 1823 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1824 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1825 1826 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1827 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1828 if (localPointsA) { 1829 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1830 for (i=0; i<numRootsB; i++) { 1831 reorderedRemotePointsA[i].rank = -1; 1832 reorderedRemotePointsA[i].index = -1; 1833 } 1834 for (i=0; i<numLeavesA; i++) { 1835 if (localPointsA[i] >= numRootsB) continue; 1836 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1837 } 1838 remotePointsA = reorderedRemotePointsA; 1839 } 1840 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1841 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1842 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1843 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1844 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1845 1846 denseB = (PetscBool)!localPointsB; 1847 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1848 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1849 else numLeavesBA++; 1850 } 1851 if (denseB) { 1852 localPointsBA = NULL; 1853 remotePointsBA = leafdataB; 1854 } else { 1855 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1856 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1857 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1858 const PetscInt l = localPointsB ? localPointsB[i] : i; 1859 1860 if (leafdataB[l-minleaf].rank == -1) continue; 1861 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1862 localPointsBA[numLeavesBA] = l; 1863 numLeavesBA++; 1864 } 1865 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1866 } 1867 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1868 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1869 PetscFunctionReturn(0); 1870 } 1871 1872 /*@ 1873 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1874 1875 Input Parameters: 1876 + sfA - The first PetscSF 1877 - sfB - The second PetscSF 1878 1879 Output Parameters: 1880 . sfBA - The composite SF. 1881 1882 Level: developer 1883 1884 Notes: 1885 Currently, the two SFs must be defined on congruent communicators and they must be true star 1886 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1887 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1888 1889 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1890 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1891 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1892 a Reduce on sfB, on connected roots. 1893 1894 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1895 @*/ 1896 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1897 { 1898 PetscErrorCode ierr; 1899 const PetscSFNode *remotePointsA,*remotePointsB; 1900 PetscSFNode *remotePointsBA; 1901 const PetscInt *localPointsA,*localPointsB; 1902 PetscSFNode *reorderedRemotePointsA = NULL; 1903 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1904 MPI_Op op; 1905 #if defined(PETSC_USE_64BIT_INDICES) 1906 PetscBool iswin; 1907 #endif 1908 1909 PetscFunctionBegin; 1910 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1911 PetscSFCheckGraphSet(sfA,1); 1912 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1913 PetscSFCheckGraphSet(sfB,2); 1914 PetscCheckSameComm(sfA,1,sfB,2); 1915 PetscValidPointer(sfBA,3); 1916 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1917 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1918 1919 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1920 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1921 1922 /* TODO: Check roots of sfB have degree of 1 */ 1923 /* Once we implement it, we can replace the MPI_MAXLOC 1924 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1925 We use MPI_MAXLOC only to have a deterministic output from this routine if 1926 the root condition is not meet. 1927 */ 1928 op = MPI_MAXLOC; 1929 #if defined(PETSC_USE_64BIT_INDICES) 1930 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 1931 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 1932 if (iswin) op = MPIU_REPLACE; 1933 #endif 1934 1935 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1936 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1937 for (i=0; i<maxleaf - minleaf + 1; i++) { 1938 reorderedRemotePointsA[i].rank = -1; 1939 reorderedRemotePointsA[i].index = -1; 1940 } 1941 if (localPointsA) { 1942 for (i=0; i<numLeavesA; i++) { 1943 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1944 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1945 } 1946 } else { 1947 for (i=0; i<numLeavesA; i++) { 1948 if (i > maxleaf || i < minleaf) continue; 1949 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1950 } 1951 } 1952 1953 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1954 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1955 for (i=0; i<numRootsB; i++) { 1956 remotePointsBA[i].rank = -1; 1957 remotePointsBA[i].index = -1; 1958 } 1959 1960 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1961 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1962 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1963 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1964 if (remotePointsBA[i].rank == -1) continue; 1965 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1966 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1967 localPointsBA[numLeavesBA] = i; 1968 numLeavesBA++; 1969 } 1970 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1971 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1972 PetscFunctionReturn(0); 1973 } 1974 1975 /* 1976 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1977 1978 Input Parameters: 1979 . sf - The global PetscSF 1980 1981 Output Parameters: 1982 . out - The local PetscSF 1983 */ 1984 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1985 { 1986 MPI_Comm comm; 1987 PetscMPIInt myrank; 1988 const PetscInt *ilocal; 1989 const PetscSFNode *iremote; 1990 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1991 PetscSFNode *liremote; 1992 PetscSF lsf; 1993 PetscErrorCode ierr; 1994 1995 PetscFunctionBegin; 1996 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1997 if (sf->ops->CreateLocalSF) { 1998 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1999 } else { 2000 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2001 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2002 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2003 2004 /* Find out local edges and build a local SF */ 2005 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2006 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2007 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2008 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2009 2010 for (i=j=0; i<nleaves; i++) { 2011 if (iremote[i].rank == (PetscInt)myrank) { 2012 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2013 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2014 liremote[j].index = iremote[i].index; 2015 j++; 2016 } 2017 } 2018 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2019 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2020 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2021 *out = lsf; 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2027 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2028 { 2029 PetscErrorCode ierr; 2030 PetscMemType rootmtype,leafmtype; 2031 2032 PetscFunctionBegin; 2033 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2034 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2035 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2036 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2037 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2038 if (sf->ops->BcastToZero) { 2039 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2040 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2041 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2042 PetscFunctionReturn(0); 2043 } 2044 2045