1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;} 98 #endif 99 sf->degreeknown = PETSC_FALSE; 100 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 101 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 102 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 103 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 104 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 105 sf->setupcalled = PETSC_FALSE; 106 PetscFunctionReturn(0); 107 } 108 109 /*@C 110 PetscSFSetType - Set the PetscSF communication implementation 111 112 Collective on PetscSF 113 114 Input Parameters: 115 + sf - the PetscSF context 116 - type - a known method 117 118 Options Database Key: 119 . -sf_type <type> - Sets the method; use -help for a list 120 of available methods (for instance, window, pt2pt, neighbor) 121 122 Notes: 123 See "include/petscsf.h" for available methods (for instance) 124 + PETSCSFWINDOW - MPI-2/3 one-sided 125 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 126 127 Level: intermediate 128 129 .seealso: PetscSFType, PetscSFCreate() 130 @*/ 131 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 132 { 133 PetscErrorCode ierr,(*r)(PetscSF); 134 PetscBool match; 135 136 PetscFunctionBegin; 137 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 138 PetscValidCharPointer(type,2); 139 140 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 141 if (match) PetscFunctionReturn(0); 142 143 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 144 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 145 /* Destroy the previous PetscSF implementation context */ 146 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 147 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 148 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 149 ierr = (*r)(sf);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /*@C 154 PetscSFGetType - Get the PetscSF communication implementation 155 156 Not Collective 157 158 Input Parameter: 159 . sf - the PetscSF context 160 161 Output Parameter: 162 . type - the PetscSF type name 163 164 Level: intermediate 165 166 .seealso: PetscSFSetType(), PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 169 { 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 172 PetscValidPointer(type,2); 173 *type = ((PetscObject)sf)->type_name; 174 PetscFunctionReturn(0); 175 } 176 177 /*@ 178 PetscSFDestroy - destroy star forest 179 180 Collective 181 182 Input Arguments: 183 . sf - address of star forest 184 185 Level: intermediate 186 187 .seealso: PetscSFCreate(), PetscSFReset() 188 @*/ 189 PetscErrorCode PetscSFDestroy(PetscSF *sf) 190 { 191 PetscErrorCode ierr; 192 193 PetscFunctionBegin; 194 if (!*sf) PetscFunctionReturn(0); 195 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 196 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 197 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 198 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 199 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 204 { 205 #if defined(PETSC_USE_DEBUG) 206 PetscInt i, nleaves; 207 PetscMPIInt size; 208 const PetscInt *ilocal; 209 const PetscSFNode *iremote; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!sf->graphset) PetscFunctionReturn(0); 214 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 215 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 216 for (i = 0; i < nleaves; i++) { 217 const PetscInt rank = iremote[i].rank; 218 const PetscInt remote = iremote[i].index; 219 const PetscInt leaf = ilocal ? ilocal[i] : i; 220 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 221 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 222 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 223 } 224 PetscFunctionReturn(0); 225 #else 226 PetscFunctionBegin; 227 PetscFunctionReturn(0); 228 #endif 229 } 230 231 /*@ 232 PetscSFSetUp - set up communication structures 233 234 Collective 235 236 Input Arguments: 237 . sf - star forest communication object 238 239 Level: beginner 240 241 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 242 @*/ 243 PetscErrorCode PetscSFSetUp(PetscSF sf) 244 { 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 249 PetscSFCheckGraphSet(sf,1); 250 if (sf->setupcalled) PetscFunctionReturn(0); 251 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 252 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 253 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 254 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 255 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 256 sf->setupcalled = PETSC_TRUE; 257 PetscFunctionReturn(0); 258 } 259 260 /*@ 261 PetscSFSetFromOptions - set PetscSF options using the options database 262 263 Logically Collective 264 265 Input Arguments: 266 . sf - star forest 267 268 Options Database Keys: 269 + -sf_type - implementation type, see PetscSFSetType() 270 - -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 271 272 Level: intermediate 273 274 .seealso: PetscSFWindowSetSyncType() 275 @*/ 276 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 277 { 278 PetscSFType deft; 279 char type[256]; 280 PetscErrorCode ierr; 281 PetscBool flg; 282 283 PetscFunctionBegin; 284 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 285 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 286 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 287 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 288 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 289 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 290 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 291 ierr = PetscOptionsEnd();CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 /*@ 296 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 297 298 Logically Collective 299 300 Input Arguments: 301 + sf - star forest 302 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 303 304 Level: advanced 305 306 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 307 @*/ 308 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 309 { 310 311 PetscFunctionBegin; 312 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 313 PetscValidLogicalCollectiveBool(sf,flg,2); 314 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 315 sf->rankorder = flg; 316 PetscFunctionReturn(0); 317 } 318 319 /*@ 320 PetscSFSetGraph - Set a parallel star forest 321 322 Collective 323 324 Input Arguments: 325 + sf - star forest 326 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 327 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 328 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 329 during setup in debug mode) 330 . localmode - copy mode for ilocal 331 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 332 during setup in debug mode) 333 - remotemode - copy mode for iremote 334 335 Level: intermediate 336 337 Notes: 338 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 339 340 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 341 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 342 needed 343 344 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 345 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 346 347 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 348 @*/ 349 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 355 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 356 if (nleaves > 0) PetscValidPointer(iremote,6); 357 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 358 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 359 360 ierr = PetscSFReset(sf);CHKERRQ(ierr); 361 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 362 363 sf->nroots = nroots; 364 sf->nleaves = nleaves; 365 366 if (nleaves && ilocal) { 367 PetscInt i; 368 PetscInt minleaf = PETSC_MAX_INT; 369 PetscInt maxleaf = PETSC_MIN_INT; 370 int contiguous = 1; 371 for (i=0; i<nleaves; i++) { 372 minleaf = PetscMin(minleaf,ilocal[i]); 373 maxleaf = PetscMax(maxleaf,ilocal[i]); 374 contiguous &= (ilocal[i] == i); 375 } 376 sf->minleaf = minleaf; 377 sf->maxleaf = maxleaf; 378 if (contiguous) { 379 if (localmode == PETSC_OWN_POINTER) { 380 ierr = PetscFree(ilocal);CHKERRQ(ierr); 381 } 382 ilocal = NULL; 383 } 384 } else { 385 sf->minleaf = 0; 386 sf->maxleaf = nleaves - 1; 387 } 388 389 if (ilocal) { 390 switch (localmode) { 391 case PETSC_COPY_VALUES: 392 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 393 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 394 sf->mine = sf->mine_alloc; 395 break; 396 case PETSC_OWN_POINTER: 397 sf->mine_alloc = (PetscInt*)ilocal; 398 sf->mine = sf->mine_alloc; 399 break; 400 case PETSC_USE_POINTER: 401 sf->mine_alloc = NULL; 402 sf->mine = (PetscInt*)ilocal; 403 break; 404 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 405 } 406 } 407 408 switch (remotemode) { 409 case PETSC_COPY_VALUES: 410 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 411 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 412 sf->remote = sf->remote_alloc; 413 break; 414 case PETSC_OWN_POINTER: 415 sf->remote_alloc = (PetscSFNode*)iremote; 416 sf->remote = sf->remote_alloc; 417 break; 418 case PETSC_USE_POINTER: 419 sf->remote_alloc = NULL; 420 sf->remote = (PetscSFNode*)iremote; 421 break; 422 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 423 } 424 425 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 426 sf->graphset = PETSC_TRUE; 427 PetscFunctionReturn(0); 428 } 429 430 /*@ 431 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 432 433 Collective 434 435 Input Parameters: 436 + sf - The PetscSF 437 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 438 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 439 440 Notes: 441 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 442 n and N are local and global sizes of x respectively. 443 444 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 445 sequential vectors y on all ranks. 446 447 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 448 sequential vector y on rank 0. 449 450 In above cases, entries of x are roots and entries of y are leaves. 451 452 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 453 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 454 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 455 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 456 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 457 458 In this case, roots and leaves are symmetric. 459 460 Level: intermediate 461 @*/ 462 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 463 { 464 MPI_Comm comm; 465 PetscInt n,N,res[2]; 466 PetscMPIInt rank,size; 467 PetscSFType type; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 472 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 473 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 474 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 475 476 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 477 type = PETSCSFALLTOALL; 478 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 479 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 480 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 481 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 482 } else { 483 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 484 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 485 res[0] = n; 486 res[1] = -n; 487 /* Check if n are same over all ranks so that we can optimize it */ 488 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 489 if (res[0] == -res[1]) { /* same n */ 490 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 491 } else { 492 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 493 } 494 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 495 } 496 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 497 498 sf->pattern = pattern; 499 sf->mine = NULL; /* Contiguous */ 500 501 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 502 Also set other easy stuff. 503 */ 504 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 505 sf->nleaves = N; 506 sf->nroots = n; 507 sf->nranks = size; 508 sf->minleaf = 0; 509 sf->maxleaf = N - 1; 510 } else if (pattern == PETSCSF_PATTERN_GATHER) { 511 sf->nleaves = rank ? 0 : N; 512 sf->nroots = n; 513 sf->nranks = rank ? 0 : size; 514 sf->minleaf = 0; 515 sf->maxleaf = rank ? -1 : N - 1; 516 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 517 sf->nleaves = size; 518 sf->nroots = size; 519 sf->nranks = size; 520 sf->minleaf = 0; 521 sf->maxleaf = size - 1; 522 } 523 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 524 sf->graphset = PETSC_TRUE; 525 PetscFunctionReturn(0); 526 } 527 528 /*@ 529 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 530 531 Collective 532 533 Input Arguments: 534 535 . sf - star forest to invert 536 537 Output Arguments: 538 . isf - inverse of sf 539 Level: advanced 540 541 Notes: 542 All roots must have degree 1. 543 544 The local space may be a permutation, but cannot be sparse. 545 546 .seealso: PetscSFSetGraph() 547 @*/ 548 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 549 { 550 PetscErrorCode ierr; 551 PetscMPIInt rank; 552 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 553 const PetscInt *ilocal; 554 PetscSFNode *roots,*leaves; 555 556 PetscFunctionBegin; 557 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 558 PetscSFCheckGraphSet(sf,1); 559 PetscValidPointer(isf,2); 560 561 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 562 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 563 564 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 565 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 566 for (i=0; i<maxlocal; i++) { 567 leaves[i].rank = rank; 568 leaves[i].index = i; 569 } 570 for (i=0; i <nroots; i++) { 571 roots[i].rank = -1; 572 roots[i].index = -1; 573 } 574 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 575 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 576 577 /* Check whether our leaves are sparse */ 578 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 579 if (count == nroots) newilocal = NULL; 580 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 581 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 582 for (i=0,count=0; i<nroots; i++) { 583 if (roots[i].rank >= 0) { 584 newilocal[count] = i; 585 roots[count].rank = roots[i].rank; 586 roots[count].index = roots[i].index; 587 count++; 588 } 589 } 590 } 591 592 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 593 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 594 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 /*@ 599 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 600 601 Collective 602 603 Input Arguments: 604 + sf - communication object to duplicate 605 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 606 607 Output Arguments: 608 . newsf - new communication object 609 610 Level: beginner 611 612 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 613 @*/ 614 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 615 { 616 PetscSFType type; 617 PetscErrorCode ierr; 618 619 PetscFunctionBegin; 620 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 621 PetscValidLogicalCollectiveEnum(sf,opt,2); 622 PetscValidPointer(newsf,3); 623 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 624 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 625 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 626 if (opt == PETSCSF_DUPLICATE_GRAPH) { 627 PetscSFCheckGraphSet(sf,1); 628 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 629 PetscInt nroots,nleaves; 630 const PetscInt *ilocal; 631 const PetscSFNode *iremote; 632 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 633 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 634 } else { 635 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 636 } 637 } 638 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 639 PetscFunctionReturn(0); 640 } 641 642 /*@C 643 PetscSFGetGraph - Get the graph specifying a parallel star forest 644 645 Not Collective 646 647 Input Arguments: 648 . sf - star forest 649 650 Output Arguments: 651 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 652 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 653 . ilocal - locations of leaves in leafdata buffers 654 - iremote - remote locations of root vertices for each leaf on the current process 655 656 Notes: 657 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 658 659 When called from Fortran, the returned iremote array is a copy and must deallocated after use. Consequently, if you 660 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 661 662 Level: intermediate 663 664 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 665 @*/ 666 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 667 { 668 PetscErrorCode ierr; 669 670 PetscFunctionBegin; 671 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 672 if (sf->ops->GetGraph) { 673 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 674 } else { 675 if (nroots) *nroots = sf->nroots; 676 if (nleaves) *nleaves = sf->nleaves; 677 if (ilocal) *ilocal = sf->mine; 678 if (iremote) *iremote = sf->remote; 679 } 680 PetscFunctionReturn(0); 681 } 682 683 /*@ 684 PetscSFGetLeafRange - Get the active leaf ranges 685 686 Not Collective 687 688 Input Arguments: 689 . sf - star forest 690 691 Output Arguments: 692 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 693 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 694 695 Level: developer 696 697 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 698 @*/ 699 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 700 { 701 702 PetscFunctionBegin; 703 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 704 PetscSFCheckGraphSet(sf,1); 705 if (minleaf) *minleaf = sf->minleaf; 706 if (maxleaf) *maxleaf = sf->maxleaf; 707 PetscFunctionReturn(0); 708 } 709 710 /*@C 711 PetscSFView - view a star forest 712 713 Collective 714 715 Input Arguments: 716 + sf - star forest 717 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 718 719 Level: beginner 720 721 .seealso: PetscSFCreate(), PetscSFSetGraph() 722 @*/ 723 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 724 { 725 PetscErrorCode ierr; 726 PetscBool iascii; 727 PetscViewerFormat format; 728 729 PetscFunctionBegin; 730 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 731 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 732 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 733 PetscCheckSameComm(sf,1,viewer,2); 734 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 735 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 736 if (iascii) { 737 PetscMPIInt rank; 738 PetscInt ii,i,j; 739 740 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 741 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 742 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 743 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 744 if (!sf->graphset) { 745 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 746 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 750 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 751 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 752 for (i=0; i<sf->nleaves; i++) { 753 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 754 } 755 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 756 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 757 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 758 PetscMPIInt *tmpranks,*perm; 759 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 760 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 761 for (i=0; i<sf->nranks; i++) perm[i] = i; 762 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 763 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 764 for (ii=0; ii<sf->nranks; ii++) { 765 i = perm[ii]; 766 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 767 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 768 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 769 } 770 } 771 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 772 } 773 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 774 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 775 } 776 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 777 } 778 PetscFunctionReturn(0); 779 } 780 781 /*@C 782 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 783 784 Not Collective 785 786 Input Arguments: 787 . sf - star forest 788 789 Output Arguments: 790 + nranks - number of ranks referenced by local part 791 . ranks - array of ranks 792 . roffset - offset in rmine/rremote for each rank (length nranks+1) 793 . rmine - concatenated array holding local indices referencing each remote rank 794 - rremote - concatenated array holding remote indices referenced for each remote rank 795 796 Level: developer 797 798 .seealso: PetscSFGetLeafRanks() 799 @*/ 800 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 801 { 802 PetscErrorCode ierr; 803 804 PetscFunctionBegin; 805 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 806 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 807 if (sf->ops->GetRootRanks) { 808 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 809 } else { 810 /* The generic implementation */ 811 if (nranks) *nranks = sf->nranks; 812 if (ranks) *ranks = sf->ranks; 813 if (roffset) *roffset = sf->roffset; 814 if (rmine) *rmine = sf->rmine; 815 if (rremote) *rremote = sf->rremote; 816 } 817 PetscFunctionReturn(0); 818 } 819 820 /*@C 821 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 822 823 Not Collective 824 825 Input Arguments: 826 . sf - star forest 827 828 Output Arguments: 829 + niranks - number of leaf ranks referencing roots on this process 830 . iranks - array of ranks 831 . ioffset - offset in irootloc for each rank (length niranks+1) 832 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 833 834 Level: developer 835 836 .seealso: PetscSFGetRootRanks() 837 @*/ 838 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 839 { 840 PetscErrorCode ierr; 841 842 PetscFunctionBegin; 843 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 844 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 845 if (sf->ops->GetLeafRanks) { 846 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 847 } else { 848 PetscSFType type; 849 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 850 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 856 PetscInt i; 857 for (i=0; i<n; i++) { 858 if (needle == list[i]) return PETSC_TRUE; 859 } 860 return PETSC_FALSE; 861 } 862 863 /*@C 864 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 865 866 Collective 867 868 Input Arguments: 869 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 870 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 871 872 Level: developer 873 874 .seealso: PetscSFGetRootRanks() 875 @*/ 876 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 877 { 878 PetscErrorCode ierr; 879 PetscTable table; 880 PetscTablePosition pos; 881 PetscMPIInt size,groupsize,*groupranks; 882 PetscInt *rcount,*ranks; 883 PetscInt i, irank = -1,orank = -1; 884 885 PetscFunctionBegin; 886 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 887 PetscSFCheckGraphSet(sf,1); 888 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 889 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 890 for (i=0; i<sf->nleaves; i++) { 891 /* Log 1-based rank */ 892 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 893 } 894 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 895 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 896 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 897 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 898 for (i=0; i<sf->nranks; i++) { 899 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 900 ranks[i]--; /* Convert back to 0-based */ 901 } 902 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 903 904 /* We expect that dgroup is reliably "small" while nranks could be large */ 905 { 906 MPI_Group group = MPI_GROUP_NULL; 907 PetscMPIInt *dgroupranks; 908 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 909 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 910 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 911 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 912 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 913 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 914 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 915 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 916 } 917 918 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 919 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 920 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 921 if (InList(ranks[i],groupsize,groupranks)) break; 922 } 923 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 924 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 925 } 926 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 927 PetscInt tmprank,tmpcount; 928 929 tmprank = ranks[i]; 930 tmpcount = rcount[i]; 931 ranks[i] = ranks[sf->ndranks]; 932 rcount[i] = rcount[sf->ndranks]; 933 ranks[sf->ndranks] = tmprank; 934 rcount[sf->ndranks] = tmpcount; 935 sf->ndranks++; 936 } 937 } 938 ierr = PetscFree(groupranks);CHKERRQ(ierr); 939 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 940 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 941 sf->roffset[0] = 0; 942 for (i=0; i<sf->nranks; i++) { 943 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 944 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 945 rcount[i] = 0; 946 } 947 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 948 /* short circuit */ 949 if (orank != sf->remote[i].rank) { 950 /* Search for index of iremote[i].rank in sf->ranks */ 951 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 952 if (irank < 0) { 953 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 954 if (irank >= 0) irank += sf->ndranks; 955 } 956 orank = sf->remote[i].rank; 957 } 958 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 959 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 960 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 961 rcount[irank]++; 962 } 963 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } 966 967 /*@C 968 PetscSFGetGroups - gets incoming and outgoing process groups 969 970 Collective 971 972 Input Argument: 973 . sf - star forest 974 975 Output Arguments: 976 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 977 - outgoing - group of destination processes for outgoing edges (roots that I reference) 978 979 Level: developer 980 981 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 982 @*/ 983 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 984 { 985 PetscErrorCode ierr; 986 MPI_Group group = MPI_GROUP_NULL; 987 988 PetscFunctionBegin; 989 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups"); 990 if (sf->ingroup == MPI_GROUP_NULL) { 991 PetscInt i; 992 const PetscInt *indegree; 993 PetscMPIInt rank,*outranks,*inranks; 994 PetscSFNode *remote; 995 PetscSF bgcount; 996 997 /* Compute the number of incoming ranks */ 998 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 999 for (i=0; i<sf->nranks; i++) { 1000 remote[i].rank = sf->ranks[i]; 1001 remote[i].index = 0; 1002 } 1003 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1004 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1005 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1006 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1007 1008 /* Enumerate the incoming ranks */ 1009 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1010 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1011 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1012 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1013 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1014 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1015 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1016 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1017 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1018 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1019 } 1020 *incoming = sf->ingroup; 1021 1022 if (sf->outgroup == MPI_GROUP_NULL) { 1023 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1024 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1025 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1026 } 1027 *outgoing = sf->outgroup; 1028 PetscFunctionReturn(0); 1029 } 1030 1031 /*@ 1032 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1033 1034 Collective 1035 1036 Input Argument: 1037 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1038 1039 Output Arguments: 1040 . multi - star forest with split roots, such that each root has degree exactly 1 1041 1042 Level: developer 1043 1044 Notes: 1045 1046 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1047 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1048 edge, it is a candidate for future optimization that might involve its removal. 1049 1050 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1051 @*/ 1052 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1053 { 1054 PetscErrorCode ierr; 1055 1056 PetscFunctionBegin; 1057 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1058 PetscValidPointer(multi,2); 1059 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1060 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1061 *multi = sf->multi; 1062 PetscFunctionReturn(0); 1063 } 1064 if (!sf->multi) { 1065 const PetscInt *indegree; 1066 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1067 PetscSFNode *remote; 1068 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1069 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1070 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1071 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1072 inoffset[0] = 0; 1073 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1074 for (i=0; i<maxlocal; i++) outones[i] = 1; 1075 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1076 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1077 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1078 #if 0 1079 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1080 for (i=0; i<sf->nroots; i++) { 1081 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1082 } 1083 #endif 1084 #endif 1085 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1086 for (i=0; i<sf->nleaves; i++) { 1087 remote[i].rank = sf->remote[i].rank; 1088 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1089 } 1090 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1091 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1092 if (sf->rankorder) { /* Sort the ranks */ 1093 PetscMPIInt rank; 1094 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1095 PetscSFNode *newremote; 1096 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1097 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1098 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1099 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1100 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1101 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1102 /* Sort the incoming ranks at each vertex, build the inverse map */ 1103 for (i=0; i<sf->nroots; i++) { 1104 PetscInt j; 1105 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1106 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1107 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1108 } 1109 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1110 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1111 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1112 for (i=0; i<sf->nleaves; i++) { 1113 newremote[i].rank = sf->remote[i].rank; 1114 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1115 } 1116 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1117 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1118 } 1119 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1120 } 1121 *multi = sf->multi; 1122 PetscFunctionReturn(0); 1123 } 1124 1125 /*@C 1126 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1127 1128 Collective 1129 1130 Input Arguments: 1131 + sf - original star forest 1132 . nselected - number of selected roots on this process 1133 - selected - indices of the selected roots on this process 1134 1135 Output Arguments: 1136 . newsf - new star forest 1137 1138 Level: advanced 1139 1140 Note: 1141 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1142 be done by calling PetscSFGetGraph(). 1143 1144 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1145 @*/ 1146 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1147 { 1148 PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal; 1149 const PetscSFNode *iremote; 1150 PetscSFNode *new_iremote; 1151 PetscSF tmpsf; 1152 MPI_Comm comm; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1157 PetscSFCheckGraphSet(sf,1); 1158 if (nselected) PetscValidPointer(selected,3); 1159 PetscValidPointer(newsf,4); 1160 1161 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1162 1163 /* Uniq selected[] and put results in roots[] */ 1164 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1165 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 1166 ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr); 1167 ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr); 1168 if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots); 1169 1170 if (sf->ops->CreateEmbeddedSF) { 1171 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr); 1172 } else { 1173 /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */ 1174 /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */ 1175 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 1176 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr); 1177 ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr); 1178 ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr); 1179 for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1; 1180 ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1181 ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1182 ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr); 1183 1184 /* Build newsf with leaves that are still connected */ 1185 connected_leaves = 0; 1186 for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i]; 1187 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 1188 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 1189 for (i=0, n=0; i<nleaves; ++i) { 1190 if (leafdata[i]) { 1191 new_ilocal[n] = sf->mine ? sf->mine[i] : i; 1192 new_iremote[n].rank = sf->remote[i].rank; 1193 new_iremote[n].index = sf->remote[i].index; 1194 ++n; 1195 } 1196 } 1197 1198 if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves); 1199 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1200 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1201 ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1202 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 1203 } 1204 ierr = PetscFree(roots);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 /*@C 1209 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1210 1211 Collective 1212 1213 Input Arguments: 1214 + sf - original star forest 1215 . nselected - number of selected leaves on this process 1216 - selected - indices of the selected leaves on this process 1217 1218 Output Arguments: 1219 . newsf - new star forest 1220 1221 Level: advanced 1222 1223 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1224 @*/ 1225 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1226 { 1227 const PetscSFNode *iremote; 1228 PetscSFNode *new_iremote; 1229 const PetscInt *ilocal; 1230 PetscInt i,nroots,*leaves,*new_ilocal; 1231 MPI_Comm comm; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1236 PetscSFCheckGraphSet(sf,1); 1237 if (nselected) PetscValidPointer(selected,3); 1238 PetscValidPointer(newsf,4); 1239 1240 /* Uniq selected[] and put results in leaves[] */ 1241 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1242 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1243 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1244 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1245 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1246 1247 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1248 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1249 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1250 } else { 1251 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1252 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1253 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1254 for (i=0; i<nselected; ++i) { 1255 const PetscInt l = leaves[i]; 1256 new_ilocal[i] = ilocal ? ilocal[l] : l; 1257 new_iremote[i].rank = iremote[l].rank; 1258 new_iremote[i].index = iremote[l].index; 1259 } 1260 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1261 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1262 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1263 } 1264 ierr = PetscFree(leaves);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 /*@C 1269 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1270 1271 Collective on PetscSF 1272 1273 Input Arguments: 1274 + sf - star forest on which to communicate 1275 . unit - data type associated with each node 1276 . rootdata - buffer to broadcast 1277 - op - operation to use for reduction 1278 1279 Output Arguments: 1280 . leafdata - buffer to be reduced with values from each leaf's respective root 1281 1282 Level: intermediate 1283 1284 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1285 @*/ 1286 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1287 { 1288 PetscErrorCode ierr; 1289 PetscMemType rootmtype,leafmtype; 1290 1291 PetscFunctionBegin; 1292 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1293 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1294 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1295 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1296 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1297 #if defined(PETSC_HAVE_CUDA) 1298 /* Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)? 1299 To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream. 1300 But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if 1301 we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also 1302 computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize 1303 is inevitable. We do it now and put pack/unpack kernels to non-NULL streams. 1304 */ 1305 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1306 #endif 1307 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1308 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 /*@C 1313 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1314 1315 Collective 1316 1317 Input Arguments: 1318 + sf - star forest 1319 . unit - data type 1320 . rootdata - buffer to broadcast 1321 - op - operation to use for reduction 1322 1323 Output Arguments: 1324 . leafdata - buffer to be reduced with values from each leaf's respective root 1325 1326 Level: intermediate 1327 1328 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1329 @*/ 1330 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1331 { 1332 PetscErrorCode ierr; 1333 PetscMemType rootmtype,leafmtype; 1334 1335 PetscFunctionBegin; 1336 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1337 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1338 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1339 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1340 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1341 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1342 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 /*@C 1347 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1348 1349 Collective 1350 1351 Input Arguments: 1352 + sf - star forest 1353 . unit - data type 1354 . leafdata - values to reduce 1355 - op - reduction operation 1356 1357 Output Arguments: 1358 . rootdata - result of reduction of values from all leaves of each root 1359 1360 Level: intermediate 1361 1362 .seealso: PetscSFBcastBegin() 1363 @*/ 1364 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1365 { 1366 PetscErrorCode ierr; 1367 PetscMemType rootmtype,leafmtype; 1368 1369 PetscFunctionBegin; 1370 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1371 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1372 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1373 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1374 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1375 #if defined(PETSC_HAVE_CUDA) 1376 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1377 #endif 1378 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1379 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 1383 /*@C 1384 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1385 1386 Collective 1387 1388 Input Arguments: 1389 + sf - star forest 1390 . unit - data type 1391 . leafdata - values to reduce 1392 - op - reduction operation 1393 1394 Output Arguments: 1395 . rootdata - result of reduction of values from all leaves of each root 1396 1397 Level: intermediate 1398 1399 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1400 @*/ 1401 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1402 { 1403 PetscErrorCode ierr; 1404 PetscMemType rootmtype,leafmtype; 1405 1406 PetscFunctionBegin; 1407 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1408 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1409 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1410 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1411 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1412 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1413 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 /*@C 1418 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1419 1420 Collective 1421 1422 Input Arguments: 1423 + sf - star forest 1424 . unit - data type 1425 . leafdata - leaf values to use in reduction 1426 - op - operation to use for reduction 1427 1428 Output Arguments: 1429 + rootdata - root values to be updated, input state is seen by first process to perform an update 1430 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1431 1432 Level: advanced 1433 1434 Note: 1435 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1436 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1437 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1438 integers. 1439 1440 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1441 @*/ 1442 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1443 { 1444 PetscErrorCode ierr; 1445 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1446 1447 PetscFunctionBegin; 1448 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1449 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1450 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1451 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1452 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1453 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1454 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1455 #if defined(PETSC_HAVE_CUDA) 1456 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1457 #endif 1458 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1459 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /*@C 1464 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1465 1466 Collective 1467 1468 Input Arguments: 1469 + sf - star forest 1470 . unit - data type 1471 . leafdata - leaf values to use in reduction 1472 - op - operation to use for reduction 1473 1474 Output Arguments: 1475 + rootdata - root values to be updated, input state is seen by first process to perform an update 1476 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1477 1478 Level: advanced 1479 1480 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1481 @*/ 1482 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1483 { 1484 PetscErrorCode ierr; 1485 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1486 1487 PetscFunctionBegin; 1488 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1489 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1490 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1491 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1492 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1493 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1494 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1495 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1496 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 /*@C 1501 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1502 1503 Collective 1504 1505 Input Arguments: 1506 . sf - star forest 1507 1508 Output Arguments: 1509 . degree - degree of each root vertex 1510 1511 Level: advanced 1512 1513 Notes: 1514 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1515 1516 .seealso: PetscSFGatherBegin() 1517 @*/ 1518 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1519 { 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1524 PetscSFCheckGraphSet(sf,1); 1525 PetscValidPointer(degree,2); 1526 if (!sf->degreeknown) { 1527 PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1528 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1529 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1530 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1531 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1532 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1533 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1534 } 1535 *degree = NULL; 1536 PetscFunctionReturn(0); 1537 } 1538 1539 /*@C 1540 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1541 1542 Collective 1543 1544 Input Arguments: 1545 . sf - star forest 1546 1547 Output Arguments: 1548 . degree - degree of each root vertex 1549 1550 Level: developer 1551 1552 Notes: 1553 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1554 1555 .seealso: 1556 @*/ 1557 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1558 { 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1563 PetscSFCheckGraphSet(sf,1); 1564 PetscValidPointer(degree,2); 1565 if (!sf->degreeknown) { 1566 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1567 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1568 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1569 sf->degreeknown = PETSC_TRUE; 1570 } 1571 *degree = sf->degree; 1572 PetscFunctionReturn(0); 1573 } 1574 1575 1576 /*@C 1577 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1578 Each multi-root is assigned index of the corresponding original root. 1579 1580 Collective 1581 1582 Input Arguments: 1583 + sf - star forest 1584 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1585 1586 Output Arguments: 1587 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1588 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1589 1590 Level: developer 1591 1592 Notes: 1593 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1594 1595 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1596 @*/ 1597 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1598 { 1599 PetscSF msf; 1600 PetscInt i, j, k, nroots, nmroots; 1601 PetscErrorCode ierr; 1602 1603 PetscFunctionBegin; 1604 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1605 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1606 if (nroots) PetscValidIntPointer(degree,2); 1607 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1608 PetscValidPointer(multiRootsOrigNumbering,4); 1609 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1610 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1611 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1612 for (i=0,j=0,k=0; i<nroots; i++) { 1613 if (!degree[i]) continue; 1614 for (j=0; j<degree[i]; j++,k++) { 1615 (*multiRootsOrigNumbering)[k] = i; 1616 } 1617 } 1618 #if defined(PETSC_USE_DEBUG) 1619 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1620 #endif 1621 if (nMultiRoots) *nMultiRoots = nmroots; 1622 PetscFunctionReturn(0); 1623 } 1624 1625 /*@C 1626 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1627 1628 Collective 1629 1630 Input Arguments: 1631 + sf - star forest 1632 . unit - data type 1633 - leafdata - leaf data to gather to roots 1634 1635 Output Argument: 1636 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1637 1638 Level: intermediate 1639 1640 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1641 @*/ 1642 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1643 { 1644 PetscErrorCode ierr; 1645 PetscSF multi; 1646 1647 PetscFunctionBegin; 1648 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1649 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1650 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1651 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 /*@C 1656 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1657 1658 Collective 1659 1660 Input Arguments: 1661 + sf - star forest 1662 . unit - data type 1663 - leafdata - leaf data to gather to roots 1664 1665 Output Argument: 1666 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1667 1668 Level: intermediate 1669 1670 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1671 @*/ 1672 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1673 { 1674 PetscErrorCode ierr; 1675 PetscSF multi; 1676 1677 PetscFunctionBegin; 1678 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1679 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1680 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1681 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 /*@C 1686 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1687 1688 Collective 1689 1690 Input Arguments: 1691 + sf - star forest 1692 . unit - data type 1693 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1694 1695 Output Argument: 1696 . leafdata - leaf data to be update with personal data from each respective root 1697 1698 Level: intermediate 1699 1700 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1701 @*/ 1702 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1703 { 1704 PetscErrorCode ierr; 1705 PetscSF multi; 1706 1707 PetscFunctionBegin; 1708 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1709 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1710 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1711 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 /*@C 1716 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1717 1718 Collective 1719 1720 Input Arguments: 1721 + sf - star forest 1722 . unit - data type 1723 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1724 1725 Output Argument: 1726 . leafdata - leaf data to be update with personal data from each respective root 1727 1728 Level: intermediate 1729 1730 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1731 @*/ 1732 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1733 { 1734 PetscErrorCode ierr; 1735 PetscSF multi; 1736 1737 PetscFunctionBegin; 1738 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1739 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1740 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1741 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1746 { 1747 #if defined(PETSC_USE_DEBUG) 1748 PetscInt i, n, nleaves; 1749 const PetscInt *ilocal = NULL; 1750 PetscHSetI seen; 1751 PetscErrorCode ierr; 1752 1753 PetscFunctionBegin; 1754 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1755 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1756 for (i = 0; i < nleaves; i++) { 1757 const PetscInt leaf = ilocal ? ilocal[i] : i; 1758 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1759 } 1760 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1761 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1762 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 #else 1765 PetscFunctionBegin; 1766 PetscFunctionReturn(0); 1767 #endif 1768 } 1769 /*@ 1770 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1771 1772 Input Parameters: 1773 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse. 1774 - sfB - The second PetscSF, whose number of roots must be equal to number of leaves of sfA on each processor 1775 1776 Output Parameters: 1777 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on sfB 1778 1779 Level: developer 1780 1781 Notes: 1782 For the resulting composed SF to be valid, the input SFs must be true star forests: the leaves must be unique. This is 1783 checked in debug mode. 1784 1785 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1786 @*/ 1787 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1788 { 1789 PetscErrorCode ierr; 1790 MPI_Comm comm; 1791 const PetscSFNode *remotePointsA,*remotePointsB; 1792 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1793 const PetscInt *localPointsA,*localPointsB,*localPointsBA; 1794 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf; 1795 1796 PetscFunctionBegin; 1797 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1798 PetscSFCheckGraphSet(sfA,1); 1799 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1800 PetscSFCheckGraphSet(sfB,2); 1801 PetscValidPointer(sfBA,3); 1802 ierr = PetscObjectGetComm((PetscObject)sfA,&comm);CHKERRQ(ierr); 1803 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1804 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1805 ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr); 1806 if (numRootsB != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of roots must be equal to the first SF's number of leaves"); 1807 if (maxleaf+1 != numLeavesA || minleaf) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space"); 1808 /* The above check is fast, but not sufficient, since we cannot guarantee that the SF has unique leaves. So in debug 1809 mode, check properly. */ 1810 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1811 if (localPointsA) { 1812 /* Local space is dense permutation of identity. Need to rewire order of the remote points */ 1813 ierr = PetscMalloc1(numLeavesA,&reorderedRemotePointsA);CHKERRQ(ierr); 1814 for (i=0; i<numLeavesA; i++) reorderedRemotePointsA[localPointsA[i]-minleaf] = remotePointsA[i]; 1815 remotePointsA = reorderedRemotePointsA; 1816 } 1817 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1818 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1819 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1820 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1821 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1822 1823 /* sfB's leaves must be unique, otherwise BcastAndOp(B o A) != BcastAndOp(B) o BcastAndOp(A) */ 1824 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1825 if (minleaf == 0 && maxleaf + 1 == numLeavesB) { /* Local space of sfB is an identity or permutation */ 1826 localPointsBA = NULL; 1827 remotePointsBA = leafdataB; 1828 } else { 1829 localPointsBA = localPointsB; 1830 ierr = PetscMalloc1(numLeavesB,&remotePointsBA);CHKERRQ(ierr); 1831 for (i=0; i<numLeavesB; i++) remotePointsBA[i] = leafdataB[localPointsB[i]-minleaf]; 1832 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1833 } 1834 ierr = PetscSFCreate(comm, sfBA);CHKERRQ(ierr); 1835 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesB,localPointsBA,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 1839 /*@ 1840 PetscSFComposeInverse - Compose a new PetscSF by putting inverse of the second SF under the first one 1841 1842 Input Parameters: 1843 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse. 1844 - sfB - The second PetscSF, whose number of leaves must be equal to number of leaves of sfA on each processor. All roots must have degree 1. 1845 1846 Output Parameters: 1847 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on inverse of sfB 1848 1849 Level: developer 1850 1851 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph() 1852 @*/ 1853 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1854 { 1855 PetscErrorCode ierr; 1856 MPI_Comm comm; 1857 const PetscSFNode *remotePointsA,*remotePointsB; 1858 PetscSFNode *remotePointsBA; 1859 const PetscInt *localPointsA,*localPointsB; 1860 PetscInt numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf; 1861 1862 PetscFunctionBegin; 1863 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1864 PetscSFCheckGraphSet(sfA,1); 1865 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1866 PetscSFCheckGraphSet(sfB,2); 1867 PetscValidPointer(sfBA,3); 1868 ierr = PetscObjectGetComm((PetscObject) sfA, &comm);CHKERRQ(ierr); 1869 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1870 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1871 ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr); 1872 if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space"); 1873 if (numLeavesA != numLeavesB) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of leaves must be equal to the first SF's number of leaves"); 1874 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1875 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr); 1876 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr); 1877 ierr = PetscSFCreate(comm,sfBA);CHKERRQ(ierr); 1878 ierr = PetscSFSetGraph(*sfBA,numRootsA,numRootsB,NULL,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1879 PetscFunctionReturn(0); 1880 } 1881 1882 /* 1883 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1884 1885 Input Parameters: 1886 . sf - The global PetscSF 1887 1888 Output Parameters: 1889 . out - The local PetscSF 1890 */ 1891 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1892 { 1893 MPI_Comm comm; 1894 PetscMPIInt myrank; 1895 const PetscInt *ilocal; 1896 const PetscSFNode *iremote; 1897 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1898 PetscSFNode *liremote; 1899 PetscSF lsf; 1900 PetscErrorCode ierr; 1901 1902 PetscFunctionBegin; 1903 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1904 if (sf->ops->CreateLocalSF) { 1905 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1906 } else { 1907 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 1908 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1909 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 1910 1911 /* Find out local edges and build a local SF */ 1912 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1913 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 1914 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 1915 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 1916 1917 for (i=j=0; i<nleaves; i++) { 1918 if (iremote[i].rank == (PetscInt)myrank) { 1919 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 1920 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 1921 liremote[j].index = iremote[i].index; 1922 j++; 1923 } 1924 } 1925 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 1926 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1927 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 1928 *out = lsf; 1929 } 1930 PetscFunctionReturn(0); 1931 } 1932 1933 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 1934 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 1935 { 1936 PetscErrorCode ierr; 1937 PetscMemType rootmtype,leafmtype; 1938 1939 PetscFunctionBegin; 1940 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1941 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1942 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1943 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1944 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1945 if (sf->ops->BcastToZero) { 1946 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 1947 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 1948 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1949 PetscFunctionReturn(0); 1950 } 1951 1952