1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;} 98 #endif 99 sf->degreeknown = PETSC_FALSE; 100 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 101 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 102 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 103 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 104 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 105 sf->setupcalled = PETSC_FALSE; 106 PetscFunctionReturn(0); 107 } 108 109 /*@C 110 PetscSFSetType - Set the PetscSF communication implementation 111 112 Collective on PetscSF 113 114 Input Parameters: 115 + sf - the PetscSF context 116 - type - a known method 117 118 Options Database Key: 119 . -sf_type <type> - Sets the method; use -help for a list 120 of available methods (for instance, window, pt2pt, neighbor) 121 122 Notes: 123 See "include/petscsf.h" for available methods (for instance) 124 + PETSCSFWINDOW - MPI-2/3 one-sided 125 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 126 127 Level: intermediate 128 129 .seealso: PetscSFType, PetscSFCreate() 130 @*/ 131 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 132 { 133 PetscErrorCode ierr,(*r)(PetscSF); 134 PetscBool match; 135 136 PetscFunctionBegin; 137 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 138 PetscValidCharPointer(type,2); 139 140 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 141 if (match) PetscFunctionReturn(0); 142 143 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 144 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 145 /* Destroy the previous PetscSF implementation context */ 146 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 147 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 148 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 149 ierr = (*r)(sf);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /*@C 154 PetscSFGetType - Get the PetscSF communication implementation 155 156 Not Collective 157 158 Input Parameter: 159 . sf - the PetscSF context 160 161 Output Parameter: 162 . type - the PetscSF type name 163 164 Level: intermediate 165 166 .seealso: PetscSFSetType(), PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 169 { 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 172 PetscValidPointer(type,2); 173 *type = ((PetscObject)sf)->type_name; 174 PetscFunctionReturn(0); 175 } 176 177 /*@ 178 PetscSFDestroy - destroy star forest 179 180 Collective 181 182 Input Arguments: 183 . sf - address of star forest 184 185 Level: intermediate 186 187 .seealso: PetscSFCreate(), PetscSFReset() 188 @*/ 189 PetscErrorCode PetscSFDestroy(PetscSF *sf) 190 { 191 PetscErrorCode ierr; 192 193 PetscFunctionBegin; 194 if (!*sf) PetscFunctionReturn(0); 195 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 196 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 197 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 198 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 199 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 204 { 205 #if defined(PETSC_USE_DEBUG) 206 PetscInt i, nleaves; 207 PetscMPIInt size; 208 const PetscInt *ilocal; 209 const PetscSFNode *iremote; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!sf->graphset) PetscFunctionReturn(0); 214 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 215 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 216 for (i = 0; i < nleaves; i++) { 217 const PetscInt rank = iremote[i].rank; 218 const PetscInt remote = iremote[i].index; 219 const PetscInt leaf = ilocal ? ilocal[i] : i; 220 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 221 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 222 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 223 } 224 PetscFunctionReturn(0); 225 #else 226 PetscFunctionBegin; 227 PetscFunctionReturn(0); 228 #endif 229 } 230 231 /*@ 232 PetscSFSetUp - set up communication structures 233 234 Collective 235 236 Input Arguments: 237 . sf - star forest communication object 238 239 Level: beginner 240 241 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 242 @*/ 243 PetscErrorCode PetscSFSetUp(PetscSF sf) 244 { 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 249 PetscSFCheckGraphSet(sf,1); 250 if (sf->setupcalled) PetscFunctionReturn(0); 251 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 252 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 253 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 254 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 255 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 256 sf->setupcalled = PETSC_TRUE; 257 PetscFunctionReturn(0); 258 } 259 260 /*@ 261 PetscSFSetFromOptions - set PetscSF options using the options database 262 263 Logically Collective 264 265 Input Arguments: 266 . sf - star forest 267 268 Options Database Keys: 269 + -sf_type - implementation type, see PetscSFSetType() 270 - -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 271 272 Level: intermediate 273 274 .seealso: PetscSFWindowSetSyncType() 275 @*/ 276 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 277 { 278 PetscSFType deft; 279 char type[256]; 280 PetscErrorCode ierr; 281 PetscBool flg; 282 283 PetscFunctionBegin; 284 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 285 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 286 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 287 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 288 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 289 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 290 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 291 ierr = PetscOptionsEnd();CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 /*@ 296 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 297 298 Logically Collective 299 300 Input Arguments: 301 + sf - star forest 302 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 303 304 Level: advanced 305 306 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 307 @*/ 308 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 309 { 310 311 PetscFunctionBegin; 312 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 313 PetscValidLogicalCollectiveBool(sf,flg,2); 314 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 315 sf->rankorder = flg; 316 PetscFunctionReturn(0); 317 } 318 319 /*@ 320 PetscSFSetGraph - Set a parallel star forest 321 322 Collective 323 324 Input Arguments: 325 + sf - star forest 326 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 327 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 328 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 329 during setup in debug mode) 330 . localmode - copy mode for ilocal 331 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 332 during setup in debug mode) 333 - remotemode - copy mode for iremote 334 335 Level: intermediate 336 337 Notes: 338 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 339 340 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 341 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 342 needed 343 344 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 345 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 346 347 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 348 @*/ 349 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 355 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 356 if (nleaves > 0) PetscValidPointer(iremote,6); 357 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 358 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 359 360 ierr = PetscSFReset(sf);CHKERRQ(ierr); 361 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 362 363 sf->nroots = nroots; 364 sf->nleaves = nleaves; 365 366 if (nleaves && ilocal) { 367 PetscInt i; 368 PetscInt minleaf = PETSC_MAX_INT; 369 PetscInt maxleaf = PETSC_MIN_INT; 370 int contiguous = 1; 371 for (i=0; i<nleaves; i++) { 372 minleaf = PetscMin(minleaf,ilocal[i]); 373 maxleaf = PetscMax(maxleaf,ilocal[i]); 374 contiguous &= (ilocal[i] == i); 375 } 376 sf->minleaf = minleaf; 377 sf->maxleaf = maxleaf; 378 if (contiguous) { 379 if (localmode == PETSC_OWN_POINTER) { 380 ierr = PetscFree(ilocal);CHKERRQ(ierr); 381 } 382 ilocal = NULL; 383 } 384 } else { 385 sf->minleaf = 0; 386 sf->maxleaf = nleaves - 1; 387 } 388 389 if (ilocal) { 390 switch (localmode) { 391 case PETSC_COPY_VALUES: 392 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 393 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 394 sf->mine = sf->mine_alloc; 395 break; 396 case PETSC_OWN_POINTER: 397 sf->mine_alloc = (PetscInt*)ilocal; 398 sf->mine = sf->mine_alloc; 399 break; 400 case PETSC_USE_POINTER: 401 sf->mine_alloc = NULL; 402 sf->mine = (PetscInt*)ilocal; 403 break; 404 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 405 } 406 } 407 408 switch (remotemode) { 409 case PETSC_COPY_VALUES: 410 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 411 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 412 sf->remote = sf->remote_alloc; 413 break; 414 case PETSC_OWN_POINTER: 415 sf->remote_alloc = (PetscSFNode*)iremote; 416 sf->remote = sf->remote_alloc; 417 break; 418 case PETSC_USE_POINTER: 419 sf->remote_alloc = NULL; 420 sf->remote = (PetscSFNode*)iremote; 421 break; 422 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 423 } 424 425 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 426 sf->graphset = PETSC_TRUE; 427 PetscFunctionReturn(0); 428 } 429 430 /*@ 431 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 432 433 Collective 434 435 Input Parameters: 436 + sf - The PetscSF 437 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 438 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 439 440 Notes: 441 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 442 n and N are local and global sizes of x respectively. 443 444 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 445 sequential vectors y on all ranks. 446 447 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 448 sequential vector y on rank 0. 449 450 In above cases, entries of x are roots and entries of y are leaves. 451 452 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 453 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 454 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 455 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 456 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 457 458 In this case, roots and leaves are symmetric. 459 460 Level: intermediate 461 @*/ 462 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 463 { 464 MPI_Comm comm; 465 PetscInt n,N,res[2]; 466 PetscMPIInt rank,size; 467 PetscSFType type; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 472 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 473 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 474 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 475 476 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 477 type = PETSCSFALLTOALL; 478 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 479 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 480 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 481 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 482 } else { 483 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 484 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 485 res[0] = n; 486 res[1] = -n; 487 /* Check if n are same over all ranks so that we can optimize it */ 488 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 489 if (res[0] == -res[1]) { /* same n */ 490 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 491 } else { 492 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 493 } 494 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 495 } 496 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 497 498 sf->pattern = pattern; 499 sf->mine = NULL; /* Contiguous */ 500 501 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 502 Also set other easy stuff. 503 */ 504 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 505 sf->nleaves = N; 506 sf->nroots = n; 507 sf->nranks = size; 508 sf->minleaf = 0; 509 sf->maxleaf = N - 1; 510 } else if (pattern == PETSCSF_PATTERN_GATHER) { 511 sf->nleaves = rank ? 0 : N; 512 sf->nroots = n; 513 sf->nranks = rank ? 0 : size; 514 sf->minleaf = 0; 515 sf->maxleaf = rank ? -1 : N - 1; 516 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 517 sf->nleaves = size; 518 sf->nroots = size; 519 sf->nranks = size; 520 sf->minleaf = 0; 521 sf->maxleaf = size - 1; 522 } 523 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 524 sf->graphset = PETSC_TRUE; 525 PetscFunctionReturn(0); 526 } 527 528 /*@ 529 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 530 531 Collective 532 533 Input Arguments: 534 535 . sf - star forest to invert 536 537 Output Arguments: 538 . isf - inverse of sf 539 Level: advanced 540 541 Notes: 542 All roots must have degree 1. 543 544 The local space may be a permutation, but cannot be sparse. 545 546 .seealso: PetscSFSetGraph() 547 @*/ 548 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 549 { 550 PetscErrorCode ierr; 551 PetscMPIInt rank; 552 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 553 const PetscInt *ilocal; 554 PetscSFNode *roots,*leaves; 555 556 PetscFunctionBegin; 557 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 558 PetscSFCheckGraphSet(sf,1); 559 PetscValidPointer(isf,2); 560 561 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 562 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 563 564 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 565 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 566 for (i=0; i<maxlocal; i++) { 567 leaves[i].rank = rank; 568 leaves[i].index = i; 569 } 570 for (i=0; i <nroots; i++) { 571 roots[i].rank = -1; 572 roots[i].index = -1; 573 } 574 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 575 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 576 577 /* Check whether our leaves are sparse */ 578 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 579 if (count == nroots) newilocal = NULL; 580 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 581 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 582 for (i=0,count=0; i<nroots; i++) { 583 if (roots[i].rank >= 0) { 584 newilocal[count] = i; 585 roots[count].rank = roots[i].rank; 586 roots[count].index = roots[i].index; 587 count++; 588 } 589 } 590 } 591 592 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 593 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 594 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 /*@ 599 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 600 601 Collective 602 603 Input Arguments: 604 + sf - communication object to duplicate 605 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 606 607 Output Arguments: 608 . newsf - new communication object 609 610 Level: beginner 611 612 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 613 @*/ 614 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 615 { 616 PetscSFType type; 617 PetscErrorCode ierr; 618 619 PetscFunctionBegin; 620 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 621 PetscValidLogicalCollectiveEnum(sf,opt,2); 622 PetscValidPointer(newsf,3); 623 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 624 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 625 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 626 if (opt == PETSCSF_DUPLICATE_GRAPH) { 627 PetscSFCheckGraphSet(sf,1); 628 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 629 PetscInt nroots,nleaves; 630 const PetscInt *ilocal; 631 const PetscSFNode *iremote; 632 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 633 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 634 } else { 635 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 636 } 637 } 638 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 639 PetscFunctionReturn(0); 640 } 641 642 /*@C 643 PetscSFGetGraph - Get the graph specifying a parallel star forest 644 645 Not Collective 646 647 Input Arguments: 648 . sf - star forest 649 650 Output Arguments: 651 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 652 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 653 . ilocal - locations of leaves in leafdata buffers 654 - iremote - remote locations of root vertices for each leaf on the current process 655 656 Notes: 657 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 658 659 Level: intermediate 660 661 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 662 @*/ 663 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 664 { 665 PetscErrorCode ierr; 666 667 PetscFunctionBegin; 668 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 669 if (sf->ops->GetGraph) { 670 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 671 } else { 672 if (nroots) *nroots = sf->nroots; 673 if (nleaves) *nleaves = sf->nleaves; 674 if (ilocal) *ilocal = sf->mine; 675 if (iremote) *iremote = sf->remote; 676 } 677 PetscFunctionReturn(0); 678 } 679 680 /*@ 681 PetscSFGetLeafRange - Get the active leaf ranges 682 683 Not Collective 684 685 Input Arguments: 686 . sf - star forest 687 688 Output Arguments: 689 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 690 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 691 692 Level: developer 693 694 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 695 @*/ 696 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 697 { 698 699 PetscFunctionBegin; 700 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 701 PetscSFCheckGraphSet(sf,1); 702 if (minleaf) *minleaf = sf->minleaf; 703 if (maxleaf) *maxleaf = sf->maxleaf; 704 PetscFunctionReturn(0); 705 } 706 707 /*@C 708 PetscSFView - view a star forest 709 710 Collective 711 712 Input Arguments: 713 + sf - star forest 714 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 715 716 Level: beginner 717 718 .seealso: PetscSFCreate(), PetscSFSetGraph() 719 @*/ 720 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 721 { 722 PetscErrorCode ierr; 723 PetscBool iascii; 724 PetscViewerFormat format; 725 726 PetscFunctionBegin; 727 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 728 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 729 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 730 PetscCheckSameComm(sf,1,viewer,2); 731 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 732 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 733 if (iascii) { 734 PetscMPIInt rank; 735 PetscInt ii,i,j; 736 737 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 738 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 739 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 740 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 741 if (!sf->graphset) { 742 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 743 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 747 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 748 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 749 for (i=0; i<sf->nleaves; i++) { 750 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 751 } 752 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 753 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 754 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 755 PetscMPIInt *tmpranks,*perm; 756 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 757 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 758 for (i=0; i<sf->nranks; i++) perm[i] = i; 759 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 760 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 761 for (ii=0; ii<sf->nranks; ii++) { 762 i = perm[ii]; 763 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 764 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 765 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 766 } 767 } 768 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 769 } 770 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 771 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 772 } 773 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 774 } 775 PetscFunctionReturn(0); 776 } 777 778 /*@C 779 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 780 781 Not Collective 782 783 Input Arguments: 784 . sf - star forest 785 786 Output Arguments: 787 + nranks - number of ranks referenced by local part 788 . ranks - array of ranks 789 . roffset - offset in rmine/rremote for each rank (length nranks+1) 790 . rmine - concatenated array holding local indices referencing each remote rank 791 - rremote - concatenated array holding remote indices referenced for each remote rank 792 793 Level: developer 794 795 .seealso: PetscSFGetLeafRanks() 796 @*/ 797 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 798 { 799 PetscErrorCode ierr; 800 801 PetscFunctionBegin; 802 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 803 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 804 if (sf->ops->GetRootRanks) { 805 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 806 } else { 807 /* The generic implementation */ 808 if (nranks) *nranks = sf->nranks; 809 if (ranks) *ranks = sf->ranks; 810 if (roffset) *roffset = sf->roffset; 811 if (rmine) *rmine = sf->rmine; 812 if (rremote) *rremote = sf->rremote; 813 } 814 PetscFunctionReturn(0); 815 } 816 817 /*@C 818 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 819 820 Not Collective 821 822 Input Arguments: 823 . sf - star forest 824 825 Output Arguments: 826 + niranks - number of leaf ranks referencing roots on this process 827 . iranks - array of ranks 828 . ioffset - offset in irootloc for each rank (length niranks+1) 829 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 830 831 Level: developer 832 833 .seealso: PetscSFGetRootRanks() 834 @*/ 835 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 836 { 837 PetscErrorCode ierr; 838 839 PetscFunctionBegin; 840 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 841 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 842 if (sf->ops->GetLeafRanks) { 843 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 844 } else { 845 PetscSFType type; 846 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 847 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 848 } 849 PetscFunctionReturn(0); 850 } 851 852 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 853 PetscInt i; 854 for (i=0; i<n; i++) { 855 if (needle == list[i]) return PETSC_TRUE; 856 } 857 return PETSC_FALSE; 858 } 859 860 /*@C 861 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 862 863 Collective 864 865 Input Arguments: 866 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 867 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 868 869 Level: developer 870 871 .seealso: PetscSFGetRootRanks() 872 @*/ 873 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 874 { 875 PetscErrorCode ierr; 876 PetscTable table; 877 PetscTablePosition pos; 878 PetscMPIInt size,groupsize,*groupranks; 879 PetscInt *rcount,*ranks; 880 PetscInt i, irank = -1,orank = -1; 881 882 PetscFunctionBegin; 883 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 884 PetscSFCheckGraphSet(sf,1); 885 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 886 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 887 for (i=0; i<sf->nleaves; i++) { 888 /* Log 1-based rank */ 889 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 890 } 891 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 892 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 893 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 894 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 895 for (i=0; i<sf->nranks; i++) { 896 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 897 ranks[i]--; /* Convert back to 0-based */ 898 } 899 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 900 901 /* We expect that dgroup is reliably "small" while nranks could be large */ 902 { 903 MPI_Group group = MPI_GROUP_NULL; 904 PetscMPIInt *dgroupranks; 905 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 906 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 907 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 908 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 909 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 910 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 911 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 912 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 913 } 914 915 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 916 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 917 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 918 if (InList(ranks[i],groupsize,groupranks)) break; 919 } 920 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 921 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 922 } 923 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 924 PetscInt tmprank,tmpcount; 925 926 tmprank = ranks[i]; 927 tmpcount = rcount[i]; 928 ranks[i] = ranks[sf->ndranks]; 929 rcount[i] = rcount[sf->ndranks]; 930 ranks[sf->ndranks] = tmprank; 931 rcount[sf->ndranks] = tmpcount; 932 sf->ndranks++; 933 } 934 } 935 ierr = PetscFree(groupranks);CHKERRQ(ierr); 936 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 937 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 938 sf->roffset[0] = 0; 939 for (i=0; i<sf->nranks; i++) { 940 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 941 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 942 rcount[i] = 0; 943 } 944 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 945 /* short circuit */ 946 if (orank != sf->remote[i].rank) { 947 /* Search for index of iremote[i].rank in sf->ranks */ 948 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 949 if (irank < 0) { 950 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 951 if (irank >= 0) irank += sf->ndranks; 952 } 953 orank = sf->remote[i].rank; 954 } 955 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 956 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 957 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 958 rcount[irank]++; 959 } 960 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 /*@C 965 PetscSFGetGroups - gets incoming and outgoing process groups 966 967 Collective 968 969 Input Argument: 970 . sf - star forest 971 972 Output Arguments: 973 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 974 - outgoing - group of destination processes for outgoing edges (roots that I reference) 975 976 Level: developer 977 978 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 979 @*/ 980 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 981 { 982 PetscErrorCode ierr; 983 MPI_Group group = MPI_GROUP_NULL; 984 985 PetscFunctionBegin; 986 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups"); 987 if (sf->ingroup == MPI_GROUP_NULL) { 988 PetscInt i; 989 const PetscInt *indegree; 990 PetscMPIInt rank,*outranks,*inranks; 991 PetscSFNode *remote; 992 PetscSF bgcount; 993 994 /* Compute the number of incoming ranks */ 995 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 996 for (i=0; i<sf->nranks; i++) { 997 remote[i].rank = sf->ranks[i]; 998 remote[i].index = 0; 999 } 1000 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1001 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1002 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1003 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1004 1005 /* Enumerate the incoming ranks */ 1006 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1007 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1008 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1009 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1010 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1011 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1012 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1013 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1014 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1015 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1016 } 1017 *incoming = sf->ingroup; 1018 1019 if (sf->outgroup == MPI_GROUP_NULL) { 1020 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1021 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1022 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1023 } 1024 *outgoing = sf->outgroup; 1025 PetscFunctionReturn(0); 1026 } 1027 1028 /*@ 1029 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1030 1031 Collective 1032 1033 Input Argument: 1034 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1035 1036 Output Arguments: 1037 . multi - star forest with split roots, such that each root has degree exactly 1 1038 1039 Level: developer 1040 1041 Notes: 1042 1043 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1044 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1045 edge, it is a candidate for future optimization that might involve its removal. 1046 1047 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1048 @*/ 1049 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1050 { 1051 PetscErrorCode ierr; 1052 1053 PetscFunctionBegin; 1054 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1055 PetscValidPointer(multi,2); 1056 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1057 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1058 *multi = sf->multi; 1059 PetscFunctionReturn(0); 1060 } 1061 if (!sf->multi) { 1062 const PetscInt *indegree; 1063 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1064 PetscSFNode *remote; 1065 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1066 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1067 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1068 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1069 inoffset[0] = 0; 1070 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1071 for (i=0; i<maxlocal; i++) outones[i] = 1; 1072 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1073 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1074 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1075 #if 0 1076 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1077 for (i=0; i<sf->nroots; i++) { 1078 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1079 } 1080 #endif 1081 #endif 1082 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1083 for (i=0; i<sf->nleaves; i++) { 1084 remote[i].rank = sf->remote[i].rank; 1085 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1086 } 1087 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1088 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1089 if (sf->rankorder) { /* Sort the ranks */ 1090 PetscMPIInt rank; 1091 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1092 PetscSFNode *newremote; 1093 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1094 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1095 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1096 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1097 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1098 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1099 /* Sort the incoming ranks at each vertex, build the inverse map */ 1100 for (i=0; i<sf->nroots; i++) { 1101 PetscInt j; 1102 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1103 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1104 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1105 } 1106 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1107 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1108 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1109 for (i=0; i<sf->nleaves; i++) { 1110 newremote[i].rank = sf->remote[i].rank; 1111 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1112 } 1113 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1114 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1115 } 1116 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1117 } 1118 *multi = sf->multi; 1119 PetscFunctionReturn(0); 1120 } 1121 1122 /*@C 1123 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1124 1125 Collective 1126 1127 Input Arguments: 1128 + sf - original star forest 1129 . nselected - number of selected roots on this process 1130 - selected - indices of the selected roots on this process 1131 1132 Output Arguments: 1133 . newsf - new star forest 1134 1135 Level: advanced 1136 1137 Note: 1138 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1139 be done by calling PetscSFGetGraph(). 1140 1141 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1142 @*/ 1143 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1144 { 1145 PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal; 1146 const PetscSFNode *iremote; 1147 PetscSFNode *new_iremote; 1148 PetscSF tmpsf; 1149 MPI_Comm comm; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1154 PetscSFCheckGraphSet(sf,1); 1155 if (nselected) PetscValidPointer(selected,3); 1156 PetscValidPointer(newsf,4); 1157 1158 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1159 1160 /* Uniq selected[] and put results in roots[] */ 1161 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1162 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 1163 ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr); 1164 ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr); 1165 if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots); 1166 1167 if (sf->ops->CreateEmbeddedSF) { 1168 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr); 1169 } else { 1170 /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */ 1171 /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */ 1172 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 1173 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr); 1174 ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr); 1175 ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr); 1176 for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1; 1177 ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1178 ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1179 ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr); 1180 1181 /* Build newsf with leaves that are still connected */ 1182 connected_leaves = 0; 1183 for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i]; 1184 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 1186 for (i=0, n=0; i<nleaves; ++i) { 1187 if (leafdata[i]) { 1188 new_ilocal[n] = sf->mine ? sf->mine[i] : i; 1189 new_iremote[n].rank = sf->remote[i].rank; 1190 new_iremote[n].index = sf->remote[i].index; 1191 ++n; 1192 } 1193 } 1194 1195 if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves); 1196 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1197 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1198 ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1199 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 1200 } 1201 ierr = PetscFree(roots);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 /*@C 1206 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1207 1208 Collective 1209 1210 Input Arguments: 1211 + sf - original star forest 1212 . nselected - number of selected leaves on this process 1213 - selected - indices of the selected leaves on this process 1214 1215 Output Arguments: 1216 . newsf - new star forest 1217 1218 Level: advanced 1219 1220 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1221 @*/ 1222 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1223 { 1224 const PetscSFNode *iremote; 1225 PetscSFNode *new_iremote; 1226 const PetscInt *ilocal; 1227 PetscInt i,nroots,*leaves,*new_ilocal; 1228 MPI_Comm comm; 1229 PetscErrorCode ierr; 1230 1231 PetscFunctionBegin; 1232 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1233 PetscSFCheckGraphSet(sf,1); 1234 if (nselected) PetscValidPointer(selected,3); 1235 PetscValidPointer(newsf,4); 1236 1237 /* Uniq selected[] and put results in leaves[] */ 1238 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1239 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1240 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1241 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1242 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1243 1244 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1245 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1246 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1247 } else { 1248 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1249 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1250 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1251 for (i=0; i<nselected; ++i) { 1252 const PetscInt l = leaves[i]; 1253 new_ilocal[i] = ilocal ? ilocal[l] : l; 1254 new_iremote[i].rank = iremote[l].rank; 1255 new_iremote[i].index = iremote[l].index; 1256 } 1257 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1258 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1259 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1260 } 1261 ierr = PetscFree(leaves);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 /*@C 1266 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1267 1268 Collective on PetscSF 1269 1270 Input Arguments: 1271 + sf - star forest on which to communicate 1272 . unit - data type associated with each node 1273 . rootdata - buffer to broadcast 1274 - op - operation to use for reduction 1275 1276 Output Arguments: 1277 . leafdata - buffer to be reduced with values from each leaf's respective root 1278 1279 Level: intermediate 1280 1281 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1282 @*/ 1283 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1284 { 1285 PetscErrorCode ierr; 1286 PetscMemType rootmtype,leafmtype; 1287 1288 PetscFunctionBegin; 1289 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1290 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1291 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1292 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1293 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1294 #if defined(PETSC_HAVE_CUDA) 1295 /* Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)? 1296 To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream. 1297 But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if 1298 we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also 1299 computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize 1300 is inevitable. We do it now and put pack/unpack kernels to non-NULL streams. 1301 */ 1302 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1303 #endif 1304 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1305 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 /*@C 1310 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1311 1312 Collective 1313 1314 Input Arguments: 1315 + sf - star forest 1316 . unit - data type 1317 . rootdata - buffer to broadcast 1318 - op - operation to use for reduction 1319 1320 Output Arguments: 1321 . leafdata - buffer to be reduced with values from each leaf's respective root 1322 1323 Level: intermediate 1324 1325 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1326 @*/ 1327 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1328 { 1329 PetscErrorCode ierr; 1330 PetscMemType rootmtype,leafmtype; 1331 1332 PetscFunctionBegin; 1333 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1334 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1335 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1336 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1337 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1338 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1339 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 /*@C 1344 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1345 1346 Collective 1347 1348 Input Arguments: 1349 + sf - star forest 1350 . unit - data type 1351 . leafdata - values to reduce 1352 - op - reduction operation 1353 1354 Output Arguments: 1355 . rootdata - result of reduction of values from all leaves of each root 1356 1357 Level: intermediate 1358 1359 .seealso: PetscSFBcastBegin() 1360 @*/ 1361 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1362 { 1363 PetscErrorCode ierr; 1364 PetscMemType rootmtype,leafmtype; 1365 1366 PetscFunctionBegin; 1367 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1368 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1369 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1370 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1371 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1372 #if defined(PETSC_HAVE_CUDA) 1373 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1374 #endif 1375 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1376 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 /*@C 1381 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1382 1383 Collective 1384 1385 Input Arguments: 1386 + sf - star forest 1387 . unit - data type 1388 . leafdata - values to reduce 1389 - op - reduction operation 1390 1391 Output Arguments: 1392 . rootdata - result of reduction of values from all leaves of each root 1393 1394 Level: intermediate 1395 1396 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1397 @*/ 1398 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1399 { 1400 PetscErrorCode ierr; 1401 PetscMemType rootmtype,leafmtype; 1402 1403 PetscFunctionBegin; 1404 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1405 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1406 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1407 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1408 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1409 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1410 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 /*@C 1415 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1416 1417 Collective 1418 1419 Input Arguments: 1420 + sf - star forest 1421 . unit - data type 1422 . leafdata - leaf values to use in reduction 1423 - op - operation to use for reduction 1424 1425 Output Arguments: 1426 + rootdata - root values to be updated, input state is seen by first process to perform an update 1427 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1428 1429 Level: advanced 1430 1431 Note: 1432 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1433 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1434 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1435 integers. 1436 1437 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1438 @*/ 1439 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1440 { 1441 PetscErrorCode ierr; 1442 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1443 1444 PetscFunctionBegin; 1445 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1446 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1447 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1448 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1449 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1450 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1451 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1452 #if defined(PETSC_HAVE_CUDA) 1453 if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);} 1454 #endif 1455 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1456 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 /*@C 1461 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1462 1463 Collective 1464 1465 Input Arguments: 1466 + sf - star forest 1467 . unit - data type 1468 . leafdata - leaf values to use in reduction 1469 - op - operation to use for reduction 1470 1471 Output Arguments: 1472 + rootdata - root values to be updated, input state is seen by first process to perform an update 1473 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1474 1475 Level: advanced 1476 1477 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1478 @*/ 1479 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1480 { 1481 PetscErrorCode ierr; 1482 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1483 1484 PetscFunctionBegin; 1485 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1486 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1487 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1488 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1489 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1490 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1491 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1492 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1493 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1494 PetscFunctionReturn(0); 1495 } 1496 1497 /*@C 1498 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1499 1500 Collective 1501 1502 Input Arguments: 1503 . sf - star forest 1504 1505 Output Arguments: 1506 . degree - degree of each root vertex 1507 1508 Level: advanced 1509 1510 Notes: 1511 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1512 1513 .seealso: PetscSFGatherBegin() 1514 @*/ 1515 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1516 { 1517 PetscErrorCode ierr; 1518 1519 PetscFunctionBegin; 1520 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1521 PetscSFCheckGraphSet(sf,1); 1522 PetscValidPointer(degree,2); 1523 if (!sf->degreeknown) { 1524 PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1525 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1526 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1527 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1528 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1529 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1530 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1531 } 1532 *degree = NULL; 1533 PetscFunctionReturn(0); 1534 } 1535 1536 /*@C 1537 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1538 1539 Collective 1540 1541 Input Arguments: 1542 . sf - star forest 1543 1544 Output Arguments: 1545 . degree - degree of each root vertex 1546 1547 Level: developer 1548 1549 Notes: 1550 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1551 1552 .seealso: 1553 @*/ 1554 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1555 { 1556 PetscErrorCode ierr; 1557 1558 PetscFunctionBegin; 1559 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1560 PetscSFCheckGraphSet(sf,1); 1561 PetscValidPointer(degree,2); 1562 if (!sf->degreeknown) { 1563 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1564 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1565 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1566 sf->degreeknown = PETSC_TRUE; 1567 } 1568 *degree = sf->degree; 1569 PetscFunctionReturn(0); 1570 } 1571 1572 1573 /*@C 1574 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1575 Each multi-root is assigned index of the corresponding original root. 1576 1577 Collective 1578 1579 Input Arguments: 1580 + sf - star forest 1581 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1582 1583 Output Arguments: 1584 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1585 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1586 1587 Level: developer 1588 1589 Notes: 1590 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1591 1592 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1593 @*/ 1594 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1595 { 1596 PetscSF msf; 1597 PetscInt i, j, k, nroots, nmroots; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1602 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1603 if (nroots) PetscValidIntPointer(degree,2); 1604 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1605 PetscValidPointer(multiRootsOrigNumbering,4); 1606 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1607 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1608 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1609 for (i=0,j=0,k=0; i<nroots; i++) { 1610 if (!degree[i]) continue; 1611 for (j=0; j<degree[i]; j++,k++) { 1612 (*multiRootsOrigNumbering)[k] = i; 1613 } 1614 } 1615 #if defined(PETSC_USE_DEBUG) 1616 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1617 #endif 1618 if (nMultiRoots) *nMultiRoots = nmroots; 1619 PetscFunctionReturn(0); 1620 } 1621 1622 /*@C 1623 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1624 1625 Collective 1626 1627 Input Arguments: 1628 + sf - star forest 1629 . unit - data type 1630 - leafdata - leaf data to gather to roots 1631 1632 Output Argument: 1633 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1634 1635 Level: intermediate 1636 1637 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1638 @*/ 1639 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1640 { 1641 PetscErrorCode ierr; 1642 PetscSF multi; 1643 1644 PetscFunctionBegin; 1645 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1646 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1647 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1648 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 /*@C 1653 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1654 1655 Collective 1656 1657 Input Arguments: 1658 + sf - star forest 1659 . unit - data type 1660 - leafdata - leaf data to gather to roots 1661 1662 Output Argument: 1663 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1664 1665 Level: intermediate 1666 1667 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1668 @*/ 1669 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1670 { 1671 PetscErrorCode ierr; 1672 PetscSF multi; 1673 1674 PetscFunctionBegin; 1675 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1676 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1677 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1678 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 /*@C 1683 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1684 1685 Collective 1686 1687 Input Arguments: 1688 + sf - star forest 1689 . unit - data type 1690 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1691 1692 Output Argument: 1693 . leafdata - leaf data to be update with personal data from each respective root 1694 1695 Level: intermediate 1696 1697 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1698 @*/ 1699 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1700 { 1701 PetscErrorCode ierr; 1702 PetscSF multi; 1703 1704 PetscFunctionBegin; 1705 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1706 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1707 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1708 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 /*@C 1713 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1714 1715 Collective 1716 1717 Input Arguments: 1718 + sf - star forest 1719 . unit - data type 1720 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1721 1722 Output Argument: 1723 . leafdata - leaf data to be update with personal data from each respective root 1724 1725 Level: intermediate 1726 1727 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1728 @*/ 1729 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1730 { 1731 PetscErrorCode ierr; 1732 PetscSF multi; 1733 1734 PetscFunctionBegin; 1735 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1736 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1737 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1738 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1739 PetscFunctionReturn(0); 1740 } 1741 1742 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1743 { 1744 #if defined(PETSC_USE_DEBUG) 1745 PetscInt i, n, nleaves; 1746 const PetscInt *ilocal = NULL; 1747 PetscHSetI seen; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1752 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1753 for (i = 0; i < nleaves; i++) { 1754 const PetscInt leaf = ilocal ? ilocal[i] : i; 1755 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1756 } 1757 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1758 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1759 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 #else 1762 PetscFunctionBegin; 1763 PetscFunctionReturn(0); 1764 #endif 1765 } 1766 /*@ 1767 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1768 1769 Input Parameters: 1770 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse. 1771 - sfB - The second PetscSF, whose number of roots must be equal to number of leaves of sfA on each processor 1772 1773 Output Parameters: 1774 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on sfB 1775 1776 Level: developer 1777 1778 Notes: 1779 For the resulting composed SF to be valid, the input SFs must be true star forests: the leaves must be unique. This is 1780 checked in debug mode. 1781 1782 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1783 @*/ 1784 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1785 { 1786 PetscErrorCode ierr; 1787 MPI_Comm comm; 1788 const PetscSFNode *remotePointsA,*remotePointsB; 1789 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1790 const PetscInt *localPointsA,*localPointsB,*localPointsBA; 1791 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf; 1792 1793 PetscFunctionBegin; 1794 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1795 PetscSFCheckGraphSet(sfA,1); 1796 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1797 PetscSFCheckGraphSet(sfB,2); 1798 PetscValidPointer(sfBA,3); 1799 ierr = PetscObjectGetComm((PetscObject)sfA,&comm);CHKERRQ(ierr); 1800 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1801 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1802 ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr); 1803 if (numRootsB != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of roots must be equal to the first SF's number of leaves"); 1804 if (maxleaf+1 != numLeavesA || minleaf) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space"); 1805 /* The above check is fast, but not sufficient, since we cannot guarantee that the SF has unique leaves. So in debug 1806 mode, check properly. */ 1807 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1808 if (localPointsA) { 1809 /* Local space is dense permutation of identity. Need to rewire order of the remote points */ 1810 ierr = PetscMalloc1(numLeavesA,&reorderedRemotePointsA);CHKERRQ(ierr); 1811 for (i=0; i<numLeavesA; i++) reorderedRemotePointsA[localPointsA[i]-minleaf] = remotePointsA[i]; 1812 remotePointsA = reorderedRemotePointsA; 1813 } 1814 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1815 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1816 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1817 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1818 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1819 1820 /* sfB's leaves must be unique, otherwise BcastAndOp(B o A) != BcastAndOp(B) o BcastAndOp(A) */ 1821 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1822 if (minleaf == 0 && maxleaf + 1 == numLeavesB) { /* Local space of sfB is an identity or permutation */ 1823 localPointsBA = NULL; 1824 remotePointsBA = leafdataB; 1825 } else { 1826 localPointsBA = localPointsB; 1827 ierr = PetscMalloc1(numLeavesB,&remotePointsBA);CHKERRQ(ierr); 1828 for (i=0; i<numLeavesB; i++) remotePointsBA[i] = leafdataB[localPointsB[i]-minleaf]; 1829 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1830 } 1831 ierr = PetscSFCreate(comm, sfBA);CHKERRQ(ierr); 1832 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesB,localPointsBA,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 /*@ 1837 PetscSFComposeInverse - Compose a new PetscSF by putting inverse of the second SF under the first one 1838 1839 Input Parameters: 1840 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse. 1841 - sfB - The second PetscSF, whose number of leaves must be equal to number of leaves of sfA on each processor. All roots must have degree 1. 1842 1843 Output Parameters: 1844 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on inverse of sfB 1845 1846 Level: developer 1847 1848 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph() 1849 @*/ 1850 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1851 { 1852 PetscErrorCode ierr; 1853 MPI_Comm comm; 1854 const PetscSFNode *remotePointsA,*remotePointsB; 1855 PetscSFNode *remotePointsBA; 1856 const PetscInt *localPointsA,*localPointsB; 1857 PetscInt numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf; 1858 1859 PetscFunctionBegin; 1860 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1861 PetscSFCheckGraphSet(sfA,1); 1862 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1863 PetscSFCheckGraphSet(sfB,2); 1864 PetscValidPointer(sfBA,3); 1865 ierr = PetscObjectGetComm((PetscObject) sfA, &comm);CHKERRQ(ierr); 1866 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1867 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1868 ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr); 1869 if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space"); 1870 if (numLeavesA != numLeavesB) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of leaves must be equal to the first SF's number of leaves"); 1871 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1872 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr); 1873 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr); 1874 ierr = PetscSFCreate(comm,sfBA);CHKERRQ(ierr); 1875 ierr = PetscSFSetGraph(*sfBA,numRootsA,numRootsB,NULL,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 /* 1880 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1881 1882 Input Parameters: 1883 . sf - The global PetscSF 1884 1885 Output Parameters: 1886 . out - The local PetscSF 1887 */ 1888 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1889 { 1890 MPI_Comm comm; 1891 PetscMPIInt myrank; 1892 const PetscInt *ilocal; 1893 const PetscSFNode *iremote; 1894 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1895 PetscSFNode *liremote; 1896 PetscSF lsf; 1897 PetscErrorCode ierr; 1898 1899 PetscFunctionBegin; 1900 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1901 if (sf->ops->CreateLocalSF) { 1902 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1903 } else { 1904 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 1905 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1906 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 1907 1908 /* Find out local edges and build a local SF */ 1909 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1910 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 1911 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 1912 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 1913 1914 for (i=j=0; i<nleaves; i++) { 1915 if (iremote[i].rank == (PetscInt)myrank) { 1916 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 1917 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 1918 liremote[j].index = iremote[i].index; 1919 j++; 1920 } 1921 } 1922 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 1923 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1924 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 1925 *out = lsf; 1926 } 1927 PetscFunctionReturn(0); 1928 } 1929 1930 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 1931 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 1932 { 1933 PetscErrorCode ierr; 1934 PetscMemType rootmtype,leafmtype; 1935 1936 PetscFunctionBegin; 1937 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1938 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1939 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1940 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1941 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1942 if (sf->ops->BcastToZero) { 1943 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 1944 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 1945 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1946 PetscFunctionReturn(0); 1947 } 1948 1949