1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;} 98 #endif 99 sf->degreeknown = PETSC_FALSE; 100 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 101 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 102 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 103 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 104 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 105 sf->setupcalled = PETSC_FALSE; 106 PetscFunctionReturn(0); 107 } 108 109 /*@C 110 PetscSFSetType - Set the PetscSF communication implementation 111 112 Collective on PetscSF 113 114 Input Parameters: 115 + sf - the PetscSF context 116 - type - a known method 117 118 Options Database Key: 119 . -sf_type <type> - Sets the method; use -help for a list 120 of available methods (for instance, window, basic, neighbor) 121 122 Notes: 123 See "include/petscsf.h" for available methods (for instance) 124 + PETSCSFWINDOW - MPI-2/3 one-sided 125 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 126 127 Level: intermediate 128 129 .seealso: PetscSFType, PetscSFCreate() 130 @*/ 131 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 132 { 133 PetscErrorCode ierr,(*r)(PetscSF); 134 PetscBool match; 135 136 PetscFunctionBegin; 137 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 138 PetscValidCharPointer(type,2); 139 140 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 141 if (match) PetscFunctionReturn(0); 142 143 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 144 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 145 /* Destroy the previous PetscSF implementation context */ 146 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 147 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 148 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 149 ierr = (*r)(sf);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /*@C 154 PetscSFGetType - Get the PetscSF communication implementation 155 156 Not Collective 157 158 Input Parameter: 159 . sf - the PetscSF context 160 161 Output Parameter: 162 . type - the PetscSF type name 163 164 Level: intermediate 165 166 .seealso: PetscSFSetType(), PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 169 { 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 172 PetscValidPointer(type,2); 173 *type = ((PetscObject)sf)->type_name; 174 PetscFunctionReturn(0); 175 } 176 177 /*@ 178 PetscSFDestroy - destroy star forest 179 180 Collective 181 182 Input Arguments: 183 . sf - address of star forest 184 185 Level: intermediate 186 187 .seealso: PetscSFCreate(), PetscSFReset() 188 @*/ 189 PetscErrorCode PetscSFDestroy(PetscSF *sf) 190 { 191 PetscErrorCode ierr; 192 193 PetscFunctionBegin; 194 if (!*sf) PetscFunctionReturn(0); 195 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 196 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 197 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 198 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 199 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 204 { 205 #if defined(PETSC_USE_DEBUG) 206 PetscInt i, nleaves; 207 PetscMPIInt size; 208 const PetscInt *ilocal; 209 const PetscSFNode *iremote; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!sf->graphset) PetscFunctionReturn(0); 214 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 215 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 216 for (i = 0; i < nleaves; i++) { 217 const PetscInt rank = iremote[i].rank; 218 const PetscInt remote = iremote[i].index; 219 const PetscInt leaf = ilocal ? ilocal[i] : i; 220 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 221 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 222 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 223 } 224 PetscFunctionReturn(0); 225 #else 226 PetscFunctionBegin; 227 PetscFunctionReturn(0); 228 #endif 229 } 230 231 /*@ 232 PetscSFSetUp - set up communication structures 233 234 Collective 235 236 Input Arguments: 237 . sf - star forest communication object 238 239 Level: beginner 240 241 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 242 @*/ 243 PetscErrorCode PetscSFSetUp(PetscSF sf) 244 { 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 249 PetscSFCheckGraphSet(sf,1); 250 if (sf->setupcalled) PetscFunctionReturn(0); 251 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 252 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 253 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 254 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 255 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 256 sf->setupcalled = PETSC_TRUE; 257 PetscFunctionReturn(0); 258 } 259 260 /*@ 261 PetscSFSetFromOptions - set PetscSF options using the options database 262 263 Logically Collective 264 265 Input Arguments: 266 . sf - star forest 267 268 Options Database Keys: 269 + -sf_type - implementation type, see PetscSFSetType() 270 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 271 - -sf_use_pinned_buffer - use pinned (nonpagable) memory for send/recv buffers on host when communicating GPU data but GPU-aware MPI is not used. 272 Only available for SF types of basic and neighbor. 273 274 Level: intermediate 275 @*/ 276 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 277 { 278 PetscSFType deft; 279 char type[256]; 280 PetscErrorCode ierr; 281 PetscBool flg; 282 283 PetscFunctionBegin; 284 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 285 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 286 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 287 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 288 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 289 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 290 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 291 ierr = PetscOptionsEnd();CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 /*@ 296 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 297 298 Logically Collective 299 300 Input Arguments: 301 + sf - star forest 302 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 303 304 Level: advanced 305 306 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 307 @*/ 308 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 309 { 310 PetscFunctionBegin; 311 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 312 PetscValidLogicalCollectiveBool(sf,flg,2); 313 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 314 sf->rankorder = flg; 315 PetscFunctionReturn(0); 316 } 317 318 /*@ 319 PetscSFSetGraph - Set a parallel star forest 320 321 Collective 322 323 Input Arguments: 324 + sf - star forest 325 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 326 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 327 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 328 during setup in debug mode) 329 . localmode - copy mode for ilocal 330 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 331 during setup in debug mode) 332 - remotemode - copy mode for iremote 333 334 Level: intermediate 335 336 Notes: 337 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 338 339 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 340 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 341 needed 342 343 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 344 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 345 346 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 347 @*/ 348 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 349 { 350 PetscErrorCode ierr; 351 352 PetscFunctionBegin; 353 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 354 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 355 if (nleaves > 0) PetscValidPointer(iremote,6); 356 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 357 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 358 359 if (sf->nroots >= 0) { /* Reset only if graph already set */ 360 ierr = PetscSFReset(sf);CHKERRQ(ierr); 361 } 362 363 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 364 365 sf->nroots = nroots; 366 sf->nleaves = nleaves; 367 368 if (nleaves && ilocal) { 369 PetscInt i; 370 PetscInt minleaf = PETSC_MAX_INT; 371 PetscInt maxleaf = PETSC_MIN_INT; 372 int contiguous = 1; 373 for (i=0; i<nleaves; i++) { 374 minleaf = PetscMin(minleaf,ilocal[i]); 375 maxleaf = PetscMax(maxleaf,ilocal[i]); 376 contiguous &= (ilocal[i] == i); 377 } 378 sf->minleaf = minleaf; 379 sf->maxleaf = maxleaf; 380 if (contiguous) { 381 if (localmode == PETSC_OWN_POINTER) { 382 ierr = PetscFree(ilocal);CHKERRQ(ierr); 383 } 384 ilocal = NULL; 385 } 386 } else { 387 sf->minleaf = 0; 388 sf->maxleaf = nleaves - 1; 389 } 390 391 if (ilocal) { 392 switch (localmode) { 393 case PETSC_COPY_VALUES: 394 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 395 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 396 sf->mine = sf->mine_alloc; 397 break; 398 case PETSC_OWN_POINTER: 399 sf->mine_alloc = (PetscInt*)ilocal; 400 sf->mine = sf->mine_alloc; 401 break; 402 case PETSC_USE_POINTER: 403 sf->mine_alloc = NULL; 404 sf->mine = (PetscInt*)ilocal; 405 break; 406 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 407 } 408 } 409 410 switch (remotemode) { 411 case PETSC_COPY_VALUES: 412 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 413 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 414 sf->remote = sf->remote_alloc; 415 break; 416 case PETSC_OWN_POINTER: 417 sf->remote_alloc = (PetscSFNode*)iremote; 418 sf->remote = sf->remote_alloc; 419 break; 420 case PETSC_USE_POINTER: 421 sf->remote_alloc = NULL; 422 sf->remote = (PetscSFNode*)iremote; 423 break; 424 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 425 } 426 427 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 428 sf->graphset = PETSC_TRUE; 429 PetscFunctionReturn(0); 430 } 431 432 /*@ 433 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 434 435 Collective 436 437 Input Parameters: 438 + sf - The PetscSF 439 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 440 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 441 442 Notes: 443 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 444 n and N are local and global sizes of x respectively. 445 446 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 447 sequential vectors y on all ranks. 448 449 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 450 sequential vector y on rank 0. 451 452 In above cases, entries of x are roots and entries of y are leaves. 453 454 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 455 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 456 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 457 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 458 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 459 460 In this case, roots and leaves are symmetric. 461 462 Level: intermediate 463 @*/ 464 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 465 { 466 MPI_Comm comm; 467 PetscInt n,N,res[2]; 468 PetscMPIInt rank,size; 469 PetscSFType type; 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 474 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 475 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 476 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 477 478 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 479 type = PETSCSFALLTOALL; 480 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 481 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 482 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 483 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 484 } else { 485 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 486 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 487 res[0] = n; 488 res[1] = -n; 489 /* Check if n are same over all ranks so that we can optimize it */ 490 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 491 if (res[0] == -res[1]) { /* same n */ 492 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 493 } else { 494 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 495 } 496 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 497 } 498 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 499 500 sf->pattern = pattern; 501 sf->mine = NULL; /* Contiguous */ 502 503 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 504 Also set other easy stuff. 505 */ 506 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 507 sf->nleaves = N; 508 sf->nroots = n; 509 sf->nranks = size; 510 sf->minleaf = 0; 511 sf->maxleaf = N - 1; 512 } else if (pattern == PETSCSF_PATTERN_GATHER) { 513 sf->nleaves = rank ? 0 : N; 514 sf->nroots = n; 515 sf->nranks = rank ? 0 : size; 516 sf->minleaf = 0; 517 sf->maxleaf = rank ? -1 : N - 1; 518 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 519 sf->nleaves = size; 520 sf->nroots = size; 521 sf->nranks = size; 522 sf->minleaf = 0; 523 sf->maxleaf = size - 1; 524 } 525 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 526 sf->graphset = PETSC_TRUE; 527 PetscFunctionReturn(0); 528 } 529 530 /*@ 531 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 532 533 Collective 534 535 Input Arguments: 536 537 . sf - star forest to invert 538 539 Output Arguments: 540 . isf - inverse of sf 541 Level: advanced 542 543 Notes: 544 All roots must have degree 1. 545 546 The local space may be a permutation, but cannot be sparse. 547 548 .seealso: PetscSFSetGraph() 549 @*/ 550 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 551 { 552 PetscErrorCode ierr; 553 PetscMPIInt rank; 554 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 555 const PetscInt *ilocal; 556 PetscSFNode *roots,*leaves; 557 558 PetscFunctionBegin; 559 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 560 PetscSFCheckGraphSet(sf,1); 561 PetscValidPointer(isf,2); 562 563 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 564 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 565 566 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 567 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 568 for (i=0; i<maxlocal; i++) { 569 leaves[i].rank = rank; 570 leaves[i].index = i; 571 } 572 for (i=0; i <nroots; i++) { 573 roots[i].rank = -1; 574 roots[i].index = -1; 575 } 576 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 577 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 578 579 /* Check whether our leaves are sparse */ 580 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 581 if (count == nroots) newilocal = NULL; 582 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 583 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 584 for (i=0,count=0; i<nroots; i++) { 585 if (roots[i].rank >= 0) { 586 newilocal[count] = i; 587 roots[count].rank = roots[i].rank; 588 roots[count].index = roots[i].index; 589 count++; 590 } 591 } 592 } 593 594 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 595 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 596 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 597 PetscFunctionReturn(0); 598 } 599 600 /*@ 601 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 602 603 Collective 604 605 Input Arguments: 606 + sf - communication object to duplicate 607 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 608 609 Output Arguments: 610 . newsf - new communication object 611 612 Level: beginner 613 614 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 615 @*/ 616 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 617 { 618 PetscSFType type; 619 PetscErrorCode ierr; 620 621 PetscFunctionBegin; 622 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 623 PetscValidLogicalCollectiveEnum(sf,opt,2); 624 PetscValidPointer(newsf,3); 625 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 626 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 627 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 628 if (opt == PETSCSF_DUPLICATE_GRAPH) { 629 PetscSFCheckGraphSet(sf,1); 630 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 631 PetscInt nroots,nleaves; 632 const PetscInt *ilocal; 633 const PetscSFNode *iremote; 634 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 635 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 636 } else { 637 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 638 } 639 } 640 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 641 PetscFunctionReturn(0); 642 } 643 644 /*@C 645 PetscSFGetGraph - Get the graph specifying a parallel star forest 646 647 Not Collective 648 649 Input Arguments: 650 . sf - star forest 651 652 Output Arguments: 653 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 654 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 655 . ilocal - locations of leaves in leafdata buffers 656 - iremote - remote locations of root vertices for each leaf on the current process 657 658 Notes: 659 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 660 661 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 662 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 663 664 Level: intermediate 665 666 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 667 @*/ 668 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 669 { 670 PetscErrorCode ierr; 671 672 PetscFunctionBegin; 673 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 674 if (sf->ops->GetGraph) { 675 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 676 } else { 677 if (nroots) *nroots = sf->nroots; 678 if (nleaves) *nleaves = sf->nleaves; 679 if (ilocal) *ilocal = sf->mine; 680 if (iremote) *iremote = sf->remote; 681 } 682 PetscFunctionReturn(0); 683 } 684 685 /*@ 686 PetscSFGetLeafRange - Get the active leaf ranges 687 688 Not Collective 689 690 Input Arguments: 691 . sf - star forest 692 693 Output Arguments: 694 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 695 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 696 697 Level: developer 698 699 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 700 @*/ 701 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 702 { 703 704 PetscFunctionBegin; 705 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 706 PetscSFCheckGraphSet(sf,1); 707 if (minleaf) *minleaf = sf->minleaf; 708 if (maxleaf) *maxleaf = sf->maxleaf; 709 PetscFunctionReturn(0); 710 } 711 712 /*@C 713 PetscSFViewFromOptions - View from Options 714 715 Collective on PetscSF 716 717 Input Parameters: 718 + A - the star forest 719 . obj - Optional object 720 - name - command line option 721 722 Level: intermediate 723 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 724 @*/ 725 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 726 { 727 PetscErrorCode ierr; 728 729 PetscFunctionBegin; 730 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 731 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 732 PetscFunctionReturn(0); 733 } 734 735 /*@C 736 PetscSFView - view a star forest 737 738 Collective 739 740 Input Arguments: 741 + sf - star forest 742 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 743 744 Level: beginner 745 746 .seealso: PetscSFCreate(), PetscSFSetGraph() 747 @*/ 748 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 749 { 750 PetscErrorCode ierr; 751 PetscBool iascii; 752 PetscViewerFormat format; 753 754 PetscFunctionBegin; 755 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 756 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 757 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 758 PetscCheckSameComm(sf,1,viewer,2); 759 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 760 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 761 if (iascii) { 762 PetscMPIInt rank; 763 PetscInt ii,i,j; 764 765 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 766 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 767 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 768 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 769 if (!sf->graphset) { 770 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 771 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } 774 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 775 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 776 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 777 for (i=0; i<sf->nleaves; i++) { 778 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 779 } 780 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 781 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 782 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 783 PetscMPIInt *tmpranks,*perm; 784 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 785 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 786 for (i=0; i<sf->nranks; i++) perm[i] = i; 787 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 788 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 789 for (ii=0; ii<sf->nranks; ii++) { 790 i = perm[ii]; 791 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 792 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 794 } 795 } 796 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 797 } 798 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 799 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 800 } 801 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 802 } 803 PetscFunctionReturn(0); 804 } 805 806 /*@C 807 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 808 809 Not Collective 810 811 Input Arguments: 812 . sf - star forest 813 814 Output Arguments: 815 + nranks - number of ranks referenced by local part 816 . ranks - array of ranks 817 . roffset - offset in rmine/rremote for each rank (length nranks+1) 818 . rmine - concatenated array holding local indices referencing each remote rank 819 - rremote - concatenated array holding remote indices referenced for each remote rank 820 821 Level: developer 822 823 .seealso: PetscSFGetLeafRanks() 824 @*/ 825 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 826 { 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 831 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 832 if (sf->ops->GetRootRanks) { 833 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 834 } else { 835 /* The generic implementation */ 836 if (nranks) *nranks = sf->nranks; 837 if (ranks) *ranks = sf->ranks; 838 if (roffset) *roffset = sf->roffset; 839 if (rmine) *rmine = sf->rmine; 840 if (rremote) *rremote = sf->rremote; 841 } 842 PetscFunctionReturn(0); 843 } 844 845 /*@C 846 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 847 848 Not Collective 849 850 Input Arguments: 851 . sf - star forest 852 853 Output Arguments: 854 + niranks - number of leaf ranks referencing roots on this process 855 . iranks - array of ranks 856 . ioffset - offset in irootloc for each rank (length niranks+1) 857 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 858 859 Level: developer 860 861 .seealso: PetscSFGetRootRanks() 862 @*/ 863 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 864 { 865 PetscErrorCode ierr; 866 867 PetscFunctionBegin; 868 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 869 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 870 if (sf->ops->GetLeafRanks) { 871 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 872 } else { 873 PetscSFType type; 874 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 875 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 876 } 877 PetscFunctionReturn(0); 878 } 879 880 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 881 PetscInt i; 882 for (i=0; i<n; i++) { 883 if (needle == list[i]) return PETSC_TRUE; 884 } 885 return PETSC_FALSE; 886 } 887 888 /*@C 889 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 890 891 Collective 892 893 Input Arguments: 894 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 895 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 896 897 Level: developer 898 899 .seealso: PetscSFGetRootRanks() 900 @*/ 901 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 902 { 903 PetscErrorCode ierr; 904 PetscTable table; 905 PetscTablePosition pos; 906 PetscMPIInt size,groupsize,*groupranks; 907 PetscInt *rcount,*ranks; 908 PetscInt i, irank = -1,orank = -1; 909 910 PetscFunctionBegin; 911 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 912 PetscSFCheckGraphSet(sf,1); 913 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 914 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 915 for (i=0; i<sf->nleaves; i++) { 916 /* Log 1-based rank */ 917 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 918 } 919 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 920 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 921 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 922 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 923 for (i=0; i<sf->nranks; i++) { 924 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 925 ranks[i]--; /* Convert back to 0-based */ 926 } 927 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 928 929 /* We expect that dgroup is reliably "small" while nranks could be large */ 930 { 931 MPI_Group group = MPI_GROUP_NULL; 932 PetscMPIInt *dgroupranks; 933 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 934 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 935 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 936 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 937 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 938 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 939 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 940 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 941 } 942 943 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 944 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 945 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 946 if (InList(ranks[i],groupsize,groupranks)) break; 947 } 948 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 949 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 950 } 951 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 952 PetscInt tmprank,tmpcount; 953 954 tmprank = ranks[i]; 955 tmpcount = rcount[i]; 956 ranks[i] = ranks[sf->ndranks]; 957 rcount[i] = rcount[sf->ndranks]; 958 ranks[sf->ndranks] = tmprank; 959 rcount[sf->ndranks] = tmpcount; 960 sf->ndranks++; 961 } 962 } 963 ierr = PetscFree(groupranks);CHKERRQ(ierr); 964 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 965 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 966 sf->roffset[0] = 0; 967 for (i=0; i<sf->nranks; i++) { 968 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 969 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 970 rcount[i] = 0; 971 } 972 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 973 /* short circuit */ 974 if (orank != sf->remote[i].rank) { 975 /* Search for index of iremote[i].rank in sf->ranks */ 976 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 977 if (irank < 0) { 978 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 979 if (irank >= 0) irank += sf->ndranks; 980 } 981 orank = sf->remote[i].rank; 982 } 983 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 984 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 985 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 986 rcount[irank]++; 987 } 988 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 /*@C 993 PetscSFGetGroups - gets incoming and outgoing process groups 994 995 Collective 996 997 Input Argument: 998 . sf - star forest 999 1000 Output Arguments: 1001 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1002 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1003 1004 Level: developer 1005 1006 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1007 @*/ 1008 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1009 { 1010 PetscErrorCode ierr; 1011 MPI_Group group = MPI_GROUP_NULL; 1012 1013 PetscFunctionBegin; 1014 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1015 if (sf->ingroup == MPI_GROUP_NULL) { 1016 PetscInt i; 1017 const PetscInt *indegree; 1018 PetscMPIInt rank,*outranks,*inranks; 1019 PetscSFNode *remote; 1020 PetscSF bgcount; 1021 1022 /* Compute the number of incoming ranks */ 1023 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1024 for (i=0; i<sf->nranks; i++) { 1025 remote[i].rank = sf->ranks[i]; 1026 remote[i].index = 0; 1027 } 1028 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1029 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1030 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1031 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1032 /* Enumerate the incoming ranks */ 1033 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1034 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1035 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1036 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1037 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1038 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1039 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1040 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1041 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1042 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1043 } 1044 *incoming = sf->ingroup; 1045 1046 if (sf->outgroup == MPI_GROUP_NULL) { 1047 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1048 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1049 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1050 } 1051 *outgoing = sf->outgroup; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 /*@ 1056 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1057 1058 Collective 1059 1060 Input Argument: 1061 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1062 1063 Output Arguments: 1064 . multi - star forest with split roots, such that each root has degree exactly 1 1065 1066 Level: developer 1067 1068 Notes: 1069 1070 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1071 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1072 edge, it is a candidate for future optimization that might involve its removal. 1073 1074 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1075 @*/ 1076 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1077 { 1078 PetscErrorCode ierr; 1079 1080 PetscFunctionBegin; 1081 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1082 PetscValidPointer(multi,2); 1083 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1084 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1085 *multi = sf->multi; 1086 PetscFunctionReturn(0); 1087 } 1088 if (!sf->multi) { 1089 const PetscInt *indegree; 1090 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1091 PetscSFNode *remote; 1092 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1093 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1094 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1095 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1096 inoffset[0] = 0; 1097 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1098 for (i=0; i<maxlocal; i++) outones[i] = 1; 1099 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1100 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1101 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1102 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1103 for (i=0; i<sf->nroots; i++) { 1104 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1105 } 1106 #endif 1107 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1108 for (i=0; i<sf->nleaves; i++) { 1109 remote[i].rank = sf->remote[i].rank; 1110 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1111 } 1112 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1113 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1114 if (sf->rankorder) { /* Sort the ranks */ 1115 PetscMPIInt rank; 1116 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1117 PetscSFNode *newremote; 1118 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1119 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1120 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1121 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1122 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1123 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1124 /* Sort the incoming ranks at each vertex, build the inverse map */ 1125 for (i=0; i<sf->nroots; i++) { 1126 PetscInt j; 1127 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1128 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1129 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1130 } 1131 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1132 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1133 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1134 for (i=0; i<sf->nleaves; i++) { 1135 newremote[i].rank = sf->remote[i].rank; 1136 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1137 } 1138 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1139 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1140 } 1141 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1142 } 1143 *multi = sf->multi; 1144 PetscFunctionReturn(0); 1145 } 1146 1147 /*@C 1148 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1149 1150 Collective 1151 1152 Input Arguments: 1153 + sf - original star forest 1154 . nselected - number of selected roots on this process 1155 - selected - indices of the selected roots on this process 1156 1157 Output Arguments: 1158 . newsf - new star forest 1159 1160 Level: advanced 1161 1162 Note: 1163 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1164 be done by calling PetscSFGetGraph(). 1165 1166 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1167 @*/ 1168 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1169 { 1170 PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal; 1171 const PetscSFNode *iremote; 1172 PetscSFNode *new_iremote; 1173 PetscSF tmpsf; 1174 MPI_Comm comm; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1179 PetscSFCheckGraphSet(sf,1); 1180 if (nselected) PetscValidPointer(selected,3); 1181 PetscValidPointer(newsf,4); 1182 1183 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1184 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1185 /* Uniq selected[] and put results in roots[] */ 1186 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1187 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 1188 ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr); 1189 ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr); 1190 if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots); 1191 1192 if (sf->ops->CreateEmbeddedSF) { 1193 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr); 1194 } else { 1195 /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */ 1196 /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */ 1197 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 1198 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr); 1199 ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr); 1200 ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr); 1201 for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1; 1202 ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1203 ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1204 ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr); 1205 1206 /* Build newsf with leaves that are still connected */ 1207 connected_leaves = 0; 1208 for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i]; 1209 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 1210 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 1211 for (i=0, n=0; i<nleaves; ++i) { 1212 if (leafdata[i]) { 1213 new_ilocal[n] = sf->mine ? sf->mine[i] : i; 1214 new_iremote[n].rank = sf->remote[i].rank; 1215 new_iremote[n].index = sf->remote[i].index; 1216 ++n; 1217 } 1218 } 1219 1220 if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves); 1221 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1222 ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1223 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 1224 } 1225 ierr = PetscFree(roots);CHKERRQ(ierr); 1226 ierr = PetscSFSetUp(*newsf);CHKERRQ(ierr); 1227 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 /*@C 1232 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1233 1234 Collective 1235 1236 Input Arguments: 1237 + sf - original star forest 1238 . nselected - number of selected leaves on this process 1239 - selected - indices of the selected leaves on this process 1240 1241 Output Arguments: 1242 . newsf - new star forest 1243 1244 Level: advanced 1245 1246 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1247 @*/ 1248 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1249 { 1250 const PetscSFNode *iremote; 1251 PetscSFNode *new_iremote; 1252 const PetscInt *ilocal; 1253 PetscInt i,nroots,*leaves,*new_ilocal; 1254 MPI_Comm comm; 1255 PetscErrorCode ierr; 1256 1257 PetscFunctionBegin; 1258 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1259 PetscSFCheckGraphSet(sf,1); 1260 if (nselected) PetscValidPointer(selected,3); 1261 PetscValidPointer(newsf,4); 1262 1263 /* Uniq selected[] and put results in leaves[] */ 1264 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1265 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1266 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1267 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1268 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1269 1270 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1271 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1272 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1273 } else { 1274 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1275 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1276 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1277 for (i=0; i<nselected; ++i) { 1278 const PetscInt l = leaves[i]; 1279 new_ilocal[i] = ilocal ? ilocal[l] : l; 1280 new_iremote[i].rank = iremote[l].rank; 1281 new_iremote[i].index = iremote[l].index; 1282 } 1283 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1284 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1285 } 1286 ierr = PetscFree(leaves);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 /*@C 1291 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1292 1293 Collective on PetscSF 1294 1295 Input Arguments: 1296 + sf - star forest on which to communicate 1297 . unit - data type associated with each node 1298 . rootdata - buffer to broadcast 1299 - op - operation to use for reduction 1300 1301 Output Arguments: 1302 . leafdata - buffer to be reduced with values from each leaf's respective root 1303 1304 Level: intermediate 1305 1306 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1307 @*/ 1308 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1309 { 1310 PetscErrorCode ierr; 1311 PetscMemType rootmtype,leafmtype; 1312 1313 PetscFunctionBegin; 1314 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1315 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1316 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1317 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1318 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1319 #if defined(PETSC_HAVE_CUDA) 1320 /* Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)? 1321 To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream. 1322 But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if 1323 we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also 1324 computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize 1325 is inevitable. We do it now and put pack/unpack kernels to non-NULL streams. 1326 */ 1327 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1328 #endif 1329 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1330 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /*@C 1335 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1336 1337 Collective 1338 1339 Input Arguments: 1340 + sf - star forest 1341 . unit - data type 1342 . rootdata - buffer to broadcast 1343 - op - operation to use for reduction 1344 1345 Output Arguments: 1346 . leafdata - buffer to be reduced with values from each leaf's respective root 1347 1348 Level: intermediate 1349 1350 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1351 @*/ 1352 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1353 { 1354 PetscErrorCode ierr; 1355 PetscMemType rootmtype,leafmtype; 1356 1357 PetscFunctionBegin; 1358 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1359 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1360 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1361 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1362 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1363 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1364 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 /*@C 1369 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1370 1371 Collective 1372 1373 Input Arguments: 1374 + sf - star forest 1375 . unit - data type 1376 . leafdata - values to reduce 1377 - op - reduction operation 1378 1379 Output Arguments: 1380 . rootdata - result of reduction of values from all leaves of each root 1381 1382 Level: intermediate 1383 1384 .seealso: PetscSFBcastBegin() 1385 @*/ 1386 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1387 { 1388 PetscErrorCode ierr; 1389 PetscMemType rootmtype,leafmtype; 1390 1391 PetscFunctionBegin; 1392 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1393 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1394 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1395 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1396 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1397 #if defined(PETSC_HAVE_CUDA) 1398 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1399 #endif 1400 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1401 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 1405 /*@C 1406 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1407 1408 Collective 1409 1410 Input Arguments: 1411 + sf - star forest 1412 . unit - data type 1413 . leafdata - values to reduce 1414 - op - reduction operation 1415 1416 Output Arguments: 1417 . rootdata - result of reduction of values from all leaves of each root 1418 1419 Level: intermediate 1420 1421 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1422 @*/ 1423 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1424 { 1425 PetscErrorCode ierr; 1426 PetscMemType rootmtype,leafmtype; 1427 1428 PetscFunctionBegin; 1429 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1430 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1431 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1432 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1433 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1434 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1435 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 /*@C 1440 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1441 1442 Collective 1443 1444 Input Arguments: 1445 + sf - star forest 1446 . unit - data type 1447 . leafdata - leaf values to use in reduction 1448 - op - operation to use for reduction 1449 1450 Output Arguments: 1451 + rootdata - root values to be updated, input state is seen by first process to perform an update 1452 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1453 1454 Level: advanced 1455 1456 Note: 1457 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1458 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1459 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1460 integers. 1461 1462 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1463 @*/ 1464 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1465 { 1466 PetscErrorCode ierr; 1467 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1468 1469 PetscFunctionBegin; 1470 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1471 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1472 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1473 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1474 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1475 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1476 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1477 #if defined(PETSC_HAVE_CUDA) 1478 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1479 #endif 1480 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1481 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 /*@C 1486 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1487 1488 Collective 1489 1490 Input Arguments: 1491 + sf - star forest 1492 . unit - data type 1493 . leafdata - leaf values to use in reduction 1494 - op - operation to use for reduction 1495 1496 Output Arguments: 1497 + rootdata - root values to be updated, input state is seen by first process to perform an update 1498 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1499 1500 Level: advanced 1501 1502 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1503 @*/ 1504 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1505 { 1506 PetscErrorCode ierr; 1507 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1508 1509 PetscFunctionBegin; 1510 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1511 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1512 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1513 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1514 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1515 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1516 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1517 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1518 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 /*@C 1523 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1524 1525 Collective 1526 1527 Input Arguments: 1528 . sf - star forest 1529 1530 Output Arguments: 1531 . degree - degree of each root vertex 1532 1533 Level: advanced 1534 1535 Notes: 1536 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1537 1538 .seealso: PetscSFGatherBegin() 1539 @*/ 1540 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1541 { 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1546 PetscSFCheckGraphSet(sf,1); 1547 PetscValidPointer(degree,2); 1548 if (!sf->degreeknown) { 1549 PetscInt i, nroots = sf->nroots, maxlocal; 1550 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1551 maxlocal = sf->maxleaf-sf->minleaf+1; 1552 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1553 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1554 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1555 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1556 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1557 } 1558 *degree = NULL; 1559 PetscFunctionReturn(0); 1560 } 1561 1562 /*@C 1563 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1564 1565 Collective 1566 1567 Input Arguments: 1568 . sf - star forest 1569 1570 Output Arguments: 1571 . degree - degree of each root vertex 1572 1573 Level: developer 1574 1575 Notes: 1576 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1577 1578 .seealso: 1579 @*/ 1580 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1581 { 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1586 PetscSFCheckGraphSet(sf,1); 1587 PetscValidPointer(degree,2); 1588 if (!sf->degreeknown) { 1589 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1590 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1591 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1592 sf->degreeknown = PETSC_TRUE; 1593 } 1594 *degree = sf->degree; 1595 PetscFunctionReturn(0); 1596 } 1597 1598 1599 /*@C 1600 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1601 Each multi-root is assigned index of the corresponding original root. 1602 1603 Collective 1604 1605 Input Arguments: 1606 + sf - star forest 1607 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1608 1609 Output Arguments: 1610 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1611 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1612 1613 Level: developer 1614 1615 Notes: 1616 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1617 1618 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1619 @*/ 1620 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1621 { 1622 PetscSF msf; 1623 PetscInt i, j, k, nroots, nmroots; 1624 PetscErrorCode ierr; 1625 1626 PetscFunctionBegin; 1627 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1628 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1629 if (nroots) PetscValidIntPointer(degree,2); 1630 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1631 PetscValidPointer(multiRootsOrigNumbering,4); 1632 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1633 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1634 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1635 for (i=0,j=0,k=0; i<nroots; i++) { 1636 if (!degree[i]) continue; 1637 for (j=0; j<degree[i]; j++,k++) { 1638 (*multiRootsOrigNumbering)[k] = i; 1639 } 1640 } 1641 #if defined(PETSC_USE_DEBUG) 1642 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1643 #endif 1644 if (nMultiRoots) *nMultiRoots = nmroots; 1645 PetscFunctionReturn(0); 1646 } 1647 1648 /*@C 1649 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1650 1651 Collective 1652 1653 Input Arguments: 1654 + sf - star forest 1655 . unit - data type 1656 - leafdata - leaf data to gather to roots 1657 1658 Output Argument: 1659 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1660 1661 Level: intermediate 1662 1663 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1664 @*/ 1665 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1666 { 1667 PetscErrorCode ierr; 1668 PetscSF multi; 1669 1670 PetscFunctionBegin; 1671 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1672 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1673 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1674 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 /*@C 1679 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1680 1681 Collective 1682 1683 Input Arguments: 1684 + sf - star forest 1685 . unit - data type 1686 - leafdata - leaf data to gather to roots 1687 1688 Output Argument: 1689 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1690 1691 Level: intermediate 1692 1693 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1694 @*/ 1695 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1696 { 1697 PetscErrorCode ierr; 1698 PetscSF multi; 1699 1700 PetscFunctionBegin; 1701 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1702 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1703 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1704 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 /*@C 1709 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1710 1711 Collective 1712 1713 Input Arguments: 1714 + sf - star forest 1715 . unit - data type 1716 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1717 1718 Output Argument: 1719 . leafdata - leaf data to be update with personal data from each respective root 1720 1721 Level: intermediate 1722 1723 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1724 @*/ 1725 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1726 { 1727 PetscErrorCode ierr; 1728 PetscSF multi; 1729 1730 PetscFunctionBegin; 1731 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1732 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1733 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1734 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 1738 /*@C 1739 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1740 1741 Collective 1742 1743 Input Arguments: 1744 + sf - star forest 1745 . unit - data type 1746 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1747 1748 Output Argument: 1749 . leafdata - leaf data to be update with personal data from each respective root 1750 1751 Level: intermediate 1752 1753 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1754 @*/ 1755 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1756 { 1757 PetscErrorCode ierr; 1758 PetscSF multi; 1759 1760 PetscFunctionBegin; 1761 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1762 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1763 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1764 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1769 { 1770 #if defined(PETSC_USE_DEBUG) 1771 PetscInt i, n, nleaves; 1772 const PetscInt *ilocal = NULL; 1773 PetscHSetI seen; 1774 PetscErrorCode ierr; 1775 1776 PetscFunctionBegin; 1777 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1778 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1779 for (i = 0; i < nleaves; i++) { 1780 const PetscInt leaf = ilocal ? ilocal[i] : i; 1781 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1782 } 1783 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1784 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1785 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 #else 1788 PetscFunctionBegin; 1789 PetscFunctionReturn(0); 1790 #endif 1791 } 1792 1793 /*@ 1794 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1795 1796 Input Parameters: 1797 + sfA - The first PetscSF 1798 - sfB - The second PetscSF 1799 1800 Output Parameters: 1801 . sfBA - The composite SF 1802 1803 Level: developer 1804 1805 Notes: 1806 Currently, the two SFs must be defined on congruent communicators and they must be true star 1807 forests, i.e. the same leaf is not connected with different roots. 1808 1809 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1810 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1811 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1812 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1813 1814 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1815 @*/ 1816 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1817 { 1818 PetscErrorCode ierr; 1819 const PetscSFNode *remotePointsA,*remotePointsB; 1820 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1821 const PetscInt *localPointsA,*localPointsB; 1822 PetscInt *localPointsBA; 1823 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1824 PetscBool denseB; 1825 1826 PetscFunctionBegin; 1827 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1828 PetscSFCheckGraphSet(sfA,1); 1829 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1830 PetscSFCheckGraphSet(sfB,2); 1831 PetscCheckSameComm(sfA,1,sfB,2); 1832 PetscValidPointer(sfBA,3); 1833 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1834 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1835 1836 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1837 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1838 if (localPointsA) { 1839 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1840 for (i=0; i<numRootsB; i++) { 1841 reorderedRemotePointsA[i].rank = -1; 1842 reorderedRemotePointsA[i].index = -1; 1843 } 1844 for (i=0; i<numLeavesA; i++) { 1845 if (localPointsA[i] >= numRootsB) continue; 1846 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1847 } 1848 remotePointsA = reorderedRemotePointsA; 1849 } 1850 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1851 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1852 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1853 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1854 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1855 1856 denseB = (PetscBool)!localPointsB; 1857 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1858 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1859 else numLeavesBA++; 1860 } 1861 if (denseB) { 1862 localPointsBA = NULL; 1863 remotePointsBA = leafdataB; 1864 } else { 1865 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1866 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1867 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1868 const PetscInt l = localPointsB ? localPointsB[i] : i; 1869 1870 if (leafdataB[l-minleaf].rank == -1) continue; 1871 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1872 localPointsBA[numLeavesBA] = l; 1873 numLeavesBA++; 1874 } 1875 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1876 } 1877 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1878 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1879 PetscFunctionReturn(0); 1880 } 1881 1882 /*@ 1883 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1884 1885 Input Parameters: 1886 + sfA - The first PetscSF 1887 - sfB - The second PetscSF 1888 1889 Output Parameters: 1890 . sfBA - The composite SF. 1891 1892 Level: developer 1893 1894 Notes: 1895 Currently, the two SFs must be defined on congruent communicators and they must be true star 1896 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1897 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1898 1899 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1900 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1901 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1902 a Reduce on sfB, on connected roots. 1903 1904 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1905 @*/ 1906 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1907 { 1908 PetscErrorCode ierr; 1909 const PetscSFNode *remotePointsA,*remotePointsB; 1910 PetscSFNode *remotePointsBA; 1911 const PetscInt *localPointsA,*localPointsB; 1912 PetscSFNode *reorderedRemotePointsA = NULL; 1913 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1914 MPI_Op op; 1915 #if defined(PETSC_USE_64BIT_INDICES) 1916 PetscBool iswin; 1917 #endif 1918 1919 PetscFunctionBegin; 1920 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1921 PetscSFCheckGraphSet(sfA,1); 1922 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1923 PetscSFCheckGraphSet(sfB,2); 1924 PetscCheckSameComm(sfA,1,sfB,2); 1925 PetscValidPointer(sfBA,3); 1926 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1927 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1928 1929 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1930 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1931 1932 /* TODO: Check roots of sfB have degree of 1 */ 1933 /* Once we implement it, we can replace the MPI_MAXLOC 1934 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1935 We use MPI_MAXLOC only to have a deterministic output from this routine if 1936 the root condition is not meet. 1937 */ 1938 op = MPI_MAXLOC; 1939 #if defined(PETSC_USE_64BIT_INDICES) 1940 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 1941 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 1942 if (iswin) op = MPIU_REPLACE; 1943 #endif 1944 1945 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1946 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1947 for (i=0; i<maxleaf - minleaf + 1; i++) { 1948 reorderedRemotePointsA[i].rank = -1; 1949 reorderedRemotePointsA[i].index = -1; 1950 } 1951 if (localPointsA) { 1952 for (i=0; i<numLeavesA; i++) { 1953 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1954 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1955 } 1956 } else { 1957 for (i=0; i<numLeavesA; i++) { 1958 if (i > maxleaf || i < minleaf) continue; 1959 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1960 } 1961 } 1962 1963 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1964 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1965 for (i=0; i<numRootsB; i++) { 1966 remotePointsBA[i].rank = -1; 1967 remotePointsBA[i].index = -1; 1968 } 1969 1970 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1971 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1972 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1973 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1974 if (remotePointsBA[i].rank == -1) continue; 1975 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1976 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1977 localPointsBA[numLeavesBA] = i; 1978 numLeavesBA++; 1979 } 1980 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1981 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1982 PetscFunctionReturn(0); 1983 } 1984 1985 /* 1986 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1987 1988 Input Parameters: 1989 . sf - The global PetscSF 1990 1991 Output Parameters: 1992 . out - The local PetscSF 1993 */ 1994 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1995 { 1996 MPI_Comm comm; 1997 PetscMPIInt myrank; 1998 const PetscInt *ilocal; 1999 const PetscSFNode *iremote; 2000 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2001 PetscSFNode *liremote; 2002 PetscSF lsf; 2003 PetscErrorCode ierr; 2004 2005 PetscFunctionBegin; 2006 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2007 if (sf->ops->CreateLocalSF) { 2008 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2009 } else { 2010 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2011 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2012 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2013 2014 /* Find out local edges and build a local SF */ 2015 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2016 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2017 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2018 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2019 2020 for (i=j=0; i<nleaves; i++) { 2021 if (iremote[i].rank == (PetscInt)myrank) { 2022 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2023 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2024 liremote[j].index = iremote[i].index; 2025 j++; 2026 } 2027 } 2028 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2029 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2030 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2031 *out = lsf; 2032 } 2033 PetscFunctionReturn(0); 2034 } 2035 2036 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2037 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2038 { 2039 PetscErrorCode ierr; 2040 PetscMemType rootmtype,leafmtype; 2041 2042 PetscFunctionBegin; 2043 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2044 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2045 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2046 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2047 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2048 if (sf->ops->BcastToZero) { 2049 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2050 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2051 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2052 PetscFunctionReturn(0); 2053 } 2054 2055