1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petscctable.h> 3 4 #if defined(PETSC_USE_DEBUG) 5 # define PetscSFCheckGraphSet(sf,arg) do { \ 6 if (PetscUnlikely(!(sf)->graphset)) \ 7 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 8 } while (0) 9 #else 10 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 11 #endif 12 13 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 14 15 /*@ 16 PetscSFCreate - create a star forest communication context 17 18 Collective 19 20 Input Arguments: 21 . comm - communicator on which the star forest will operate 22 23 Output Arguments: 24 . sf - new star forest context 25 26 Options Database Keys: 27 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 28 . -sf_type window -Use MPI-3 one-sided window for communication 29 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 30 31 Level: intermediate 32 33 Notes: 34 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 35 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 36 SFs are optimized and they have better performance than general SFs. 37 38 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 39 @*/ 40 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 41 { 42 PetscErrorCode ierr; 43 PetscSF b; 44 45 PetscFunctionBegin; 46 PetscValidPointer(sf,2); 47 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 48 49 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 50 51 b->nroots = -1; 52 b->nleaves = -1; 53 b->minleaf = PETSC_MAX_INT; 54 b->maxleaf = PETSC_MIN_INT; 55 b->nranks = -1; 56 b->rankorder = PETSC_TRUE; 57 b->ingroup = MPI_GROUP_NULL; 58 b->outgroup = MPI_GROUP_NULL; 59 b->graphset = PETSC_FALSE; 60 61 *sf = b; 62 PetscFunctionReturn(0); 63 } 64 65 /*@ 66 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 67 68 Collective 69 70 Input Arguments: 71 . sf - star forest 72 73 Level: advanced 74 75 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 76 @*/ 77 PetscErrorCode PetscSFReset(PetscSF sf) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 83 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 84 sf->nroots = -1; 85 sf->nleaves = -1; 86 sf->minleaf = PETSC_MAX_INT; 87 sf->maxleaf = PETSC_MIN_INT; 88 sf->mine = NULL; 89 sf->remote = NULL; 90 sf->graphset = PETSC_FALSE; 91 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 92 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 93 sf->nranks = -1; 94 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 95 sf->degreeknown = PETSC_FALSE; 96 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 97 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 98 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 99 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 100 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 101 sf->setupcalled = PETSC_FALSE; 102 PetscFunctionReturn(0); 103 } 104 105 /*@C 106 PetscSFSetType - Set the PetscSF communication implementation 107 108 Collective on PetscSF 109 110 Input Parameters: 111 + sf - the PetscSF context 112 - type - a known method 113 114 Options Database Key: 115 . -sf_type <type> - Sets the method; use -help for a list 116 of available methods (for instance, window, pt2pt, neighbor) 117 118 Notes: 119 See "include/petscsf.h" for available methods (for instance) 120 + PETSCSFWINDOW - MPI-2/3 one-sided 121 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 122 123 Level: intermediate 124 125 .seealso: PetscSFType, PetscSFCreate() 126 @*/ 127 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 128 { 129 PetscErrorCode ierr,(*r)(PetscSF); 130 PetscBool match; 131 132 PetscFunctionBegin; 133 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 134 PetscValidCharPointer(type,2); 135 136 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 137 if (match) PetscFunctionReturn(0); 138 139 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 140 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 141 /* Destroy the previous PetscSF implementation context */ 142 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 143 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 144 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 145 ierr = (*r)(sf);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 /*@C 150 PetscSFGetType - Get the PetscSF communication implementation 151 152 Not Collective 153 154 Input Parameter: 155 . sf - the PetscSF context 156 157 Output Parameter: 158 . type - the PetscSF type name 159 160 Level: intermediate 161 162 .seealso: PetscSFSetType(), PetscSFCreate() 163 @*/ 164 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 165 { 166 PetscFunctionBegin; 167 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 168 PetscValidPointer(type,2); 169 *type = ((PetscObject)sf)->type_name; 170 PetscFunctionReturn(0); 171 } 172 173 /*@ 174 PetscSFDestroy - destroy star forest 175 176 Collective 177 178 Input Arguments: 179 . sf - address of star forest 180 181 Level: intermediate 182 183 .seealso: PetscSFCreate(), PetscSFReset() 184 @*/ 185 PetscErrorCode PetscSFDestroy(PetscSF *sf) 186 { 187 PetscErrorCode ierr; 188 189 PetscFunctionBegin; 190 if (!*sf) PetscFunctionReturn(0); 191 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 192 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 193 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 194 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 195 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 196 PetscFunctionReturn(0); 197 } 198 199 /*@ 200 PetscSFSetUp - set up communication structures 201 202 Collective 203 204 Input Arguments: 205 . sf - star forest communication object 206 207 Level: beginner 208 209 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 210 @*/ 211 PetscErrorCode PetscSFSetUp(PetscSF sf) 212 { 213 PetscErrorCode ierr; 214 215 PetscFunctionBegin; 216 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 217 PetscSFCheckGraphSet(sf,1); 218 if (sf->setupcalled) PetscFunctionReturn(0); 219 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 220 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 221 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 222 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 223 sf->setupcalled = PETSC_TRUE; 224 PetscFunctionReturn(0); 225 } 226 227 /*@ 228 PetscSFSetFromOptions - set PetscSF options using the options database 229 230 Logically Collective 231 232 Input Arguments: 233 . sf - star forest 234 235 Options Database Keys: 236 + -sf_type - implementation type, see PetscSFSetType() 237 - -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 238 239 Level: intermediate 240 241 .seealso: PetscSFWindowSetSyncType() 242 @*/ 243 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 244 { 245 PetscSFType deft; 246 char type[256]; 247 PetscErrorCode ierr; 248 PetscBool flg; 249 250 PetscFunctionBegin; 251 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 252 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 253 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 254 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 255 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 256 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 257 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 258 ierr = PetscOptionsEnd();CHKERRQ(ierr); 259 PetscFunctionReturn(0); 260 } 261 262 /*@ 263 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 264 265 Logically Collective 266 267 Input Arguments: 268 + sf - star forest 269 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 270 271 Level: advanced 272 273 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 274 @*/ 275 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 276 { 277 278 PetscFunctionBegin; 279 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 280 PetscValidLogicalCollectiveBool(sf,flg,2); 281 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 282 sf->rankorder = flg; 283 PetscFunctionReturn(0); 284 } 285 286 /*@ 287 PetscSFSetGraph - Set a parallel star forest 288 289 Collective 290 291 Input Arguments: 292 + sf - star forest 293 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 294 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 295 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 296 . localmode - copy mode for ilocal 297 . iremote - remote locations of root vertices for each leaf on the current process 298 - remotemode - copy mode for iremote 299 300 Level: intermediate 301 302 Notes: 303 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 304 305 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 306 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 307 needed 308 309 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 310 @*/ 311 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 312 { 313 PetscErrorCode ierr; 314 315 PetscFunctionBegin; 316 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 317 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 318 if (nleaves > 0) PetscValidPointer(iremote,6); 319 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 320 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 321 322 ierr = PetscSFReset(sf);CHKERRQ(ierr); 323 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 324 325 sf->nroots = nroots; 326 sf->nleaves = nleaves; 327 328 if (nleaves && ilocal) { 329 PetscInt i; 330 PetscInt minleaf = PETSC_MAX_INT; 331 PetscInt maxleaf = PETSC_MIN_INT; 332 int contiguous = 1; 333 for (i=0; i<nleaves; i++) { 334 minleaf = PetscMin(minleaf,ilocal[i]); 335 maxleaf = PetscMax(maxleaf,ilocal[i]); 336 contiguous &= (ilocal[i] == i); 337 } 338 sf->minleaf = minleaf; 339 sf->maxleaf = maxleaf; 340 if (contiguous) { 341 if (localmode == PETSC_OWN_POINTER) { 342 ierr = PetscFree(ilocal);CHKERRQ(ierr); 343 } 344 ilocal = NULL; 345 } 346 } else { 347 sf->minleaf = 0; 348 sf->maxleaf = nleaves - 1; 349 } 350 351 if (ilocal) { 352 switch (localmode) { 353 case PETSC_COPY_VALUES: 354 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 355 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 356 sf->mine = sf->mine_alloc; 357 break; 358 case PETSC_OWN_POINTER: 359 sf->mine_alloc = (PetscInt*)ilocal; 360 sf->mine = sf->mine_alloc; 361 break; 362 case PETSC_USE_POINTER: 363 sf->mine_alloc = NULL; 364 sf->mine = (PetscInt*)ilocal; 365 break; 366 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 367 } 368 } 369 370 switch (remotemode) { 371 case PETSC_COPY_VALUES: 372 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 373 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 374 sf->remote = sf->remote_alloc; 375 break; 376 case PETSC_OWN_POINTER: 377 sf->remote_alloc = (PetscSFNode*)iremote; 378 sf->remote = sf->remote_alloc; 379 break; 380 case PETSC_USE_POINTER: 381 sf->remote_alloc = NULL; 382 sf->remote = (PetscSFNode*)iremote; 383 break; 384 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 385 } 386 387 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 388 sf->graphset = PETSC_TRUE; 389 PetscFunctionReturn(0); 390 } 391 392 /*@ 393 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 394 395 Collective 396 397 Input Parameters: 398 + sf - The PetscSF 399 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 400 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 401 402 Notes: 403 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 404 n and N are local and global sizes of x respectively. 405 406 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 407 sequential vectors y on all ranks. 408 409 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 410 sequential vector y on rank 0. 411 412 In above cases, entries of x are roots and entries of y are leaves. 413 414 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 415 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 416 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 417 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 418 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 419 420 In this case, roots and leaves are symmetric. 421 422 Level: intermediate 423 @*/ 424 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 425 { 426 MPI_Comm comm; 427 PetscInt n,N,res[2]; 428 PetscMPIInt rank,size; 429 PetscSFType type; 430 PetscErrorCode ierr; 431 432 PetscFunctionBegin; 433 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 434 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 435 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 436 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 437 438 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 439 type = PETSCSFALLTOALL; 440 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 441 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 442 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 443 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 444 } else { 445 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 446 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 447 res[0] = n; 448 res[1] = -n; 449 /* Check if n are same over all ranks so that we can optimize it */ 450 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 451 if (res[0] == -res[1]) { /* same n */ 452 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 453 } else { 454 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 455 } 456 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 457 } 458 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 459 460 sf->pattern = pattern; 461 sf->mine = NULL; /* Contiguous */ 462 463 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 464 Also set other easy stuff. 465 */ 466 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 467 sf->nleaves = N; 468 sf->nroots = n; 469 sf->nranks = size; 470 sf->minleaf = 0; 471 sf->maxleaf = N - 1; 472 } else if (pattern == PETSCSF_PATTERN_GATHER) { 473 sf->nleaves = rank ? 0 : N; 474 sf->nroots = n; 475 sf->nranks = rank ? 0 : size; 476 sf->minleaf = 0; 477 sf->maxleaf = rank ? -1 : N - 1; 478 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 479 sf->nleaves = size; 480 sf->nroots = size; 481 sf->nranks = size; 482 sf->minleaf = 0; 483 sf->maxleaf = size - 1; 484 } 485 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 486 sf->graphset = PETSC_TRUE; 487 PetscFunctionReturn(0); 488 } 489 490 /*@ 491 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 492 493 Collective 494 495 Input Arguments: 496 497 . sf - star forest to invert 498 499 Output Arguments: 500 . isf - inverse of sf 501 Level: advanced 502 503 Notes: 504 All roots must have degree 1. 505 506 The local space may be a permutation, but cannot be sparse. 507 508 .seealso: PetscSFSetGraph() 509 @*/ 510 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 511 { 512 PetscErrorCode ierr; 513 PetscMPIInt rank; 514 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 515 const PetscInt *ilocal; 516 PetscSFNode *roots,*leaves; 517 518 PetscFunctionBegin; 519 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 520 PetscSFCheckGraphSet(sf,1); 521 PetscValidPointer(isf,2); 522 523 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 524 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 525 526 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 527 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 528 for (i=0; i<maxlocal; i++) { 529 leaves[i].rank = rank; 530 leaves[i].index = i; 531 } 532 for (i=0; i <nroots; i++) { 533 roots[i].rank = -1; 534 roots[i].index = -1; 535 } 536 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 537 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 538 539 /* Check whether our leaves are sparse */ 540 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 541 if (count == nroots) newilocal = NULL; 542 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 543 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 544 for (i=0,count=0; i<nroots; i++) { 545 if (roots[i].rank >= 0) { 546 newilocal[count] = i; 547 roots[count].rank = roots[i].rank; 548 roots[count].index = roots[i].index; 549 count++; 550 } 551 } 552 } 553 554 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 555 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 556 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 /*@ 561 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 562 563 Collective 564 565 Input Arguments: 566 + sf - communication object to duplicate 567 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 568 569 Output Arguments: 570 . newsf - new communication object 571 572 Level: beginner 573 574 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 575 @*/ 576 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 577 { 578 PetscSFType type; 579 PetscErrorCode ierr; 580 581 PetscFunctionBegin; 582 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 583 PetscValidLogicalCollectiveEnum(sf,opt,2); 584 PetscValidPointer(newsf,3); 585 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 586 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 587 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 588 if (opt == PETSCSF_DUPLICATE_GRAPH) { 589 PetscSFCheckGraphSet(sf,1); 590 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 591 PetscInt nroots,nleaves; 592 const PetscInt *ilocal; 593 const PetscSFNode *iremote; 594 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 595 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 596 } else { 597 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 598 } 599 } 600 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 601 PetscFunctionReturn(0); 602 } 603 604 /*@C 605 PetscSFGetGraph - Get the graph specifying a parallel star forest 606 607 Not Collective 608 609 Input Arguments: 610 . sf - star forest 611 612 Output Arguments: 613 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 614 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 615 . ilocal - locations of leaves in leafdata buffers 616 - iremote - remote locations of root vertices for each leaf on the current process 617 618 Notes: 619 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 620 621 Level: intermediate 622 623 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 624 @*/ 625 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 626 { 627 PetscErrorCode ierr; 628 629 PetscFunctionBegin; 630 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 631 if (sf->ops->GetGraph) { 632 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 633 } else { 634 if (nroots) *nroots = sf->nroots; 635 if (nleaves) *nleaves = sf->nleaves; 636 if (ilocal) *ilocal = sf->mine; 637 if (iremote) *iremote = sf->remote; 638 } 639 PetscFunctionReturn(0); 640 } 641 642 /*@ 643 PetscSFGetLeafRange - Get the active leaf ranges 644 645 Not Collective 646 647 Input Arguments: 648 . sf - star forest 649 650 Output Arguments: 651 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 652 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 653 654 Level: developer 655 656 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 657 @*/ 658 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 659 { 660 661 PetscFunctionBegin; 662 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 663 PetscSFCheckGraphSet(sf,1); 664 if (minleaf) *minleaf = sf->minleaf; 665 if (maxleaf) *maxleaf = sf->maxleaf; 666 PetscFunctionReturn(0); 667 } 668 669 /*@C 670 PetscSFView - view a star forest 671 672 Collective 673 674 Input Arguments: 675 + sf - star forest 676 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 677 678 Level: beginner 679 680 .seealso: PetscSFCreate(), PetscSFSetGraph() 681 @*/ 682 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 683 { 684 PetscErrorCode ierr; 685 PetscBool iascii; 686 PetscViewerFormat format; 687 688 PetscFunctionBegin; 689 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 690 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 691 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 692 PetscCheckSameComm(sf,1,viewer,2); 693 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 694 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 695 if (iascii) { 696 PetscMPIInt rank; 697 PetscInt ii,i,j; 698 699 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 700 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 701 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 702 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 703 if (!sf->graphset) { 704 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 709 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 710 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 711 for (i=0; i<sf->nleaves; i++) { 712 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 715 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 716 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 717 PetscMPIInt *tmpranks,*perm; 718 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 719 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 720 for (i=0; i<sf->nranks; i++) perm[i] = i; 721 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 722 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 723 for (ii=0; ii<sf->nranks; ii++) { 724 i = perm[ii]; 725 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 726 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 727 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 728 } 729 } 730 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 731 } 732 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 733 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 734 } 735 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 /*@C 741 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 742 743 Not Collective 744 745 Input Arguments: 746 . sf - star forest 747 748 Output Arguments: 749 + nranks - number of ranks referenced by local part 750 . ranks - array of ranks 751 . roffset - offset in rmine/rremote for each rank (length nranks+1) 752 . rmine - concatenated array holding local indices referencing each remote rank 753 - rremote - concatenated array holding remote indices referenced for each remote rank 754 755 Level: developer 756 757 .seealso: PetscSFGetLeafRanks() 758 @*/ 759 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 760 { 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 765 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 766 if (sf->ops->GetRootRanks) { 767 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 768 } else { 769 /* The generic implementation */ 770 if (nranks) *nranks = sf->nranks; 771 if (ranks) *ranks = sf->ranks; 772 if (roffset) *roffset = sf->roffset; 773 if (rmine) *rmine = sf->rmine; 774 if (rremote) *rremote = sf->rremote; 775 } 776 PetscFunctionReturn(0); 777 } 778 779 /*@C 780 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 781 782 Not Collective 783 784 Input Arguments: 785 . sf - star forest 786 787 Output Arguments: 788 + niranks - number of leaf ranks referencing roots on this process 789 . iranks - array of ranks 790 . ioffset - offset in irootloc for each rank (length niranks+1) 791 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 792 793 Level: developer 794 795 .seealso: PetscSFGetRootRanks() 796 @*/ 797 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 798 { 799 PetscErrorCode ierr; 800 801 PetscFunctionBegin; 802 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 803 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 804 if (sf->ops->GetLeafRanks) { 805 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 806 } else { 807 PetscSFType type; 808 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 809 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 810 } 811 PetscFunctionReturn(0); 812 } 813 814 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 815 PetscInt i; 816 for (i=0; i<n; i++) { 817 if (needle == list[i]) return PETSC_TRUE; 818 } 819 return PETSC_FALSE; 820 } 821 822 /*@C 823 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 824 825 Collective 826 827 Input Arguments: 828 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 829 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 830 831 Level: developer 832 833 .seealso: PetscSFGetRootRanks() 834 @*/ 835 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 836 { 837 PetscErrorCode ierr; 838 PetscTable table; 839 PetscTablePosition pos; 840 PetscMPIInt size,groupsize,*groupranks; 841 PetscInt *rcount,*ranks; 842 PetscInt i, irank = -1,orank = -1; 843 844 PetscFunctionBegin; 845 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 846 PetscSFCheckGraphSet(sf,1); 847 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 848 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 849 for (i=0; i<sf->nleaves; i++) { 850 /* Log 1-based rank */ 851 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 852 } 853 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 854 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 855 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 856 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 857 for (i=0; i<sf->nranks; i++) { 858 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 859 ranks[i]--; /* Convert back to 0-based */ 860 } 861 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 862 863 /* We expect that dgroup is reliably "small" while nranks could be large */ 864 { 865 MPI_Group group = MPI_GROUP_NULL; 866 PetscMPIInt *dgroupranks; 867 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 868 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 869 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 870 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 871 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 872 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 873 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 874 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 875 } 876 877 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 878 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 879 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 880 if (InList(ranks[i],groupsize,groupranks)) break; 881 } 882 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 883 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 884 } 885 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 886 PetscInt tmprank,tmpcount; 887 888 tmprank = ranks[i]; 889 tmpcount = rcount[i]; 890 ranks[i] = ranks[sf->ndranks]; 891 rcount[i] = rcount[sf->ndranks]; 892 ranks[sf->ndranks] = tmprank; 893 rcount[sf->ndranks] = tmpcount; 894 sf->ndranks++; 895 } 896 } 897 ierr = PetscFree(groupranks);CHKERRQ(ierr); 898 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 899 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 900 sf->roffset[0] = 0; 901 for (i=0; i<sf->nranks; i++) { 902 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 903 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 904 rcount[i] = 0; 905 } 906 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 907 /* short circuit */ 908 if (orank != sf->remote[i].rank) { 909 /* Search for index of iremote[i].rank in sf->ranks */ 910 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 911 if (irank < 0) { 912 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 913 if (irank >= 0) irank += sf->ndranks; 914 } 915 orank = sf->remote[i].rank; 916 } 917 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 918 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 919 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 920 rcount[irank]++; 921 } 922 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 926 /*@C 927 PetscSFGetGroups - gets incoming and outgoing process groups 928 929 Collective 930 931 Input Argument: 932 . sf - star forest 933 934 Output Arguments: 935 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 936 - outgoing - group of destination processes for outgoing edges (roots that I reference) 937 938 Level: developer 939 940 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 941 @*/ 942 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 943 { 944 PetscErrorCode ierr; 945 MPI_Group group = MPI_GROUP_NULL; 946 947 PetscFunctionBegin; 948 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups"); 949 if (sf->ingroup == MPI_GROUP_NULL) { 950 PetscInt i; 951 const PetscInt *indegree; 952 PetscMPIInt rank,*outranks,*inranks; 953 PetscSFNode *remote; 954 PetscSF bgcount; 955 956 /* Compute the number of incoming ranks */ 957 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 958 for (i=0; i<sf->nranks; i++) { 959 remote[i].rank = sf->ranks[i]; 960 remote[i].index = 0; 961 } 962 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 963 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 964 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 965 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 966 967 /* Enumerate the incoming ranks */ 968 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 969 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 970 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 971 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 972 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 973 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 974 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 975 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 976 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 977 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 978 } 979 *incoming = sf->ingroup; 980 981 if (sf->outgroup == MPI_GROUP_NULL) { 982 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 983 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 984 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 985 } 986 *outgoing = sf->outgroup; 987 PetscFunctionReturn(0); 988 } 989 990 /*@ 991 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 992 993 Collective 994 995 Input Argument: 996 . sf - star forest that may contain roots with 0 or with more than 1 vertex 997 998 Output Arguments: 999 . multi - star forest with split roots, such that each root has degree exactly 1 1000 1001 Level: developer 1002 1003 Notes: 1004 1005 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1006 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1007 edge, it is a candidate for future optimization that might involve its removal. 1008 1009 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1010 @*/ 1011 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1012 { 1013 PetscErrorCode ierr; 1014 1015 PetscFunctionBegin; 1016 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1017 PetscValidPointer(multi,2); 1018 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1019 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1020 *multi = sf->multi; 1021 PetscFunctionReturn(0); 1022 } 1023 if (!sf->multi) { 1024 const PetscInt *indegree; 1025 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1026 PetscSFNode *remote; 1027 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1028 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1029 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1030 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1031 inoffset[0] = 0; 1032 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1033 for (i=0; i<maxlocal; i++) outones[i] = 1; 1034 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1035 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1036 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1037 #if 0 1038 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1039 for (i=0; i<sf->nroots; i++) { 1040 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1041 } 1042 #endif 1043 #endif 1044 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1045 for (i=0; i<sf->nleaves; i++) { 1046 remote[i].rank = sf->remote[i].rank; 1047 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1048 } 1049 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1050 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1051 if (sf->rankorder) { /* Sort the ranks */ 1052 PetscMPIInt rank; 1053 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1054 PetscSFNode *newremote; 1055 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1056 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1057 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1058 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1059 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1060 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1061 /* Sort the incoming ranks at each vertex, build the inverse map */ 1062 for (i=0; i<sf->nroots; i++) { 1063 PetscInt j; 1064 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1065 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1066 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1067 } 1068 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1069 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1070 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1071 for (i=0; i<sf->nleaves; i++) { 1072 newremote[i].rank = sf->remote[i].rank; 1073 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1074 } 1075 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1076 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1077 } 1078 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1079 } 1080 *multi = sf->multi; 1081 PetscFunctionReturn(0); 1082 } 1083 1084 /*@C 1085 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1086 1087 Collective 1088 1089 Input Arguments: 1090 + sf - original star forest 1091 . nselected - number of selected roots on this process 1092 - selected - indices of the selected roots on this process 1093 1094 Output Arguments: 1095 . newsf - new star forest 1096 1097 Level: advanced 1098 1099 Note: 1100 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1101 be done by calling PetscSFGetGraph(). 1102 1103 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1104 @*/ 1105 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1106 { 1107 PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal; 1108 const PetscSFNode *iremote; 1109 PetscSFNode *new_iremote; 1110 PetscSF tmpsf; 1111 MPI_Comm comm; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1116 PetscSFCheckGraphSet(sf,1); 1117 if (nselected) PetscValidPointer(selected,3); 1118 PetscValidPointer(newsf,4); 1119 1120 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1121 1122 /* Uniq selected[] and put results in roots[] */ 1123 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1124 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 1125 ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr); 1126 ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr); 1127 if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots); 1128 1129 if (sf->ops->CreateEmbeddedSF) { 1130 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr); 1131 } else { 1132 /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */ 1133 /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */ 1134 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr); 1135 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr); 1136 ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr); 1137 ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr); 1138 for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1; 1139 ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1140 ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 1141 ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr); 1142 1143 /* Build newsf with leaves that are still connected */ 1144 connected_leaves = 0; 1145 for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i]; 1146 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 1147 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 1148 for (i=0, n=0; i<nleaves; ++i) { 1149 if (leafdata[i]) { 1150 new_ilocal[n] = sf->mine ? sf->mine[i] : i; 1151 new_iremote[n].rank = sf->remote[i].rank; 1152 new_iremote[n].index = sf->remote[i].index; 1153 ++n; 1154 } 1155 } 1156 1157 if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves); 1158 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1159 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1160 ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1161 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 1162 } 1163 ierr = PetscFree(roots);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 /*@C 1168 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1169 1170 Collective 1171 1172 Input Arguments: 1173 + sf - original star forest 1174 . nselected - number of selected leaves on this process 1175 - selected - indices of the selected leaves on this process 1176 1177 Output Arguments: 1178 . newsf - new star forest 1179 1180 Level: advanced 1181 1182 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1183 @*/ 1184 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1185 { 1186 const PetscSFNode *iremote; 1187 PetscSFNode *new_iremote; 1188 const PetscInt *ilocal; 1189 PetscInt i,nroots,*leaves,*new_ilocal; 1190 MPI_Comm comm; 1191 PetscErrorCode ierr; 1192 1193 PetscFunctionBegin; 1194 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1195 PetscSFCheckGraphSet(sf,1); 1196 if (nselected) PetscValidPointer(selected,3); 1197 PetscValidPointer(newsf,4); 1198 1199 /* Uniq selected[] and put results in leaves[] */ 1200 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1201 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1202 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1203 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1204 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1205 1206 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1207 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1208 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1209 } else { 1210 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1211 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1212 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1213 for (i=0; i<nselected; ++i) { 1214 const PetscInt l = leaves[i]; 1215 new_ilocal[i] = ilocal ? ilocal[l] : l; 1216 new_iremote[i].rank = iremote[l].rank; 1217 new_iremote[i].index = iremote[l].index; 1218 } 1219 ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr); 1220 ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr); 1221 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1222 } 1223 ierr = PetscFree(leaves);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 /*@C 1228 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1229 1230 Collective on PetscSF 1231 1232 Input Arguments: 1233 + sf - star forest on which to communicate 1234 . unit - data type associated with each node 1235 . rootdata - buffer to broadcast 1236 - op - operation to use for reduction 1237 1238 Output Arguments: 1239 . leafdata - buffer to be reduced with values from each leaf's respective root 1240 1241 Level: intermediate 1242 1243 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1244 @*/ 1245 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1246 { 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1251 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1252 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1253 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1254 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 /*@C 1259 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1260 1261 Collective 1262 1263 Input Arguments: 1264 + sf - star forest 1265 . unit - data type 1266 . rootdata - buffer to broadcast 1267 - op - operation to use for reduction 1268 1269 Output Arguments: 1270 . leafdata - buffer to be reduced with values from each leaf's respective root 1271 1272 Level: intermediate 1273 1274 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1275 @*/ 1276 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1277 { 1278 PetscErrorCode ierr; 1279 1280 PetscFunctionBegin; 1281 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1282 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1283 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1284 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1285 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 /*@C 1290 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1291 1292 Collective 1293 1294 Input Arguments: 1295 + sf - star forest 1296 . unit - data type 1297 . leafdata - values to reduce 1298 - op - reduction operation 1299 1300 Output Arguments: 1301 . rootdata - result of reduction of values from all leaves of each root 1302 1303 Level: intermediate 1304 1305 .seealso: PetscSFBcastBegin() 1306 @*/ 1307 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1308 { 1309 PetscErrorCode ierr; 1310 1311 PetscFunctionBegin; 1312 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1313 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1314 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1315 ierr = (sf->ops->ReduceBegin)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1316 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 /*@C 1321 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1322 1323 Collective 1324 1325 Input Arguments: 1326 + sf - star forest 1327 . unit - data type 1328 . leafdata - values to reduce 1329 - op - reduction operation 1330 1331 Output Arguments: 1332 . rootdata - result of reduction of values from all leaves of each root 1333 1334 Level: intermediate 1335 1336 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1337 @*/ 1338 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1339 { 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1344 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1345 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1346 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1347 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 /*@C 1352 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1353 1354 Collective 1355 1356 Input Arguments: 1357 . sf - star forest 1358 1359 Output Arguments: 1360 . degree - degree of each root vertex 1361 1362 Level: advanced 1363 1364 Notes: 1365 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1366 1367 .seealso: PetscSFGatherBegin() 1368 @*/ 1369 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1370 { 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1375 PetscSFCheckGraphSet(sf,1); 1376 PetscValidPointer(degree,2); 1377 if (!sf->degreeknown) { 1378 PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1379 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1380 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1381 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1382 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1383 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1384 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1385 } 1386 *degree = NULL; 1387 PetscFunctionReturn(0); 1388 } 1389 1390 /*@C 1391 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1392 1393 Collective 1394 1395 Input Arguments: 1396 . sf - star forest 1397 1398 Output Arguments: 1399 . degree - degree of each root vertex 1400 1401 Level: developer 1402 1403 Notes: 1404 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1405 1406 .seealso: 1407 @*/ 1408 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1409 { 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1414 PetscSFCheckGraphSet(sf,1); 1415 PetscValidPointer(degree,2); 1416 if (!sf->degreeknown) { 1417 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1418 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr); 1419 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1420 sf->degreeknown = PETSC_TRUE; 1421 } 1422 *degree = sf->degree; 1423 PetscFunctionReturn(0); 1424 } 1425 1426 1427 /*@C 1428 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1429 Each multi-root is assigned index of the corresponding original root. 1430 1431 Collective 1432 1433 Input Arguments: 1434 + sf - star forest 1435 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1436 1437 Output Arguments: 1438 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1439 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1440 1441 Level: developer 1442 1443 Notes: 1444 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1445 1446 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1447 @*/ 1448 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1449 { 1450 PetscSF msf; 1451 PetscInt i, j, k, nroots, nmroots; 1452 PetscErrorCode ierr; 1453 1454 PetscFunctionBegin; 1455 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1456 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1457 if (nroots) PetscValidIntPointer(degree,2); 1458 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1459 PetscValidPointer(multiRootsOrigNumbering,4); 1460 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1461 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1462 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1463 for (i=0,j=0,k=0; i<nroots; i++) { 1464 if (!degree[i]) continue; 1465 for (j=0; j<degree[i]; j++,k++) { 1466 (*multiRootsOrigNumbering)[k] = i; 1467 } 1468 } 1469 #if defined(PETSC_USE_DEBUG) 1470 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1471 #endif 1472 if (nMultiRoots) *nMultiRoots = nmroots; 1473 PetscFunctionReturn(0); 1474 } 1475 1476 /*@C 1477 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1478 1479 Collective 1480 1481 Input Arguments: 1482 + sf - star forest 1483 . unit - data type 1484 . leafdata - leaf values to use in reduction 1485 - op - operation to use for reduction 1486 1487 Output Arguments: 1488 + rootdata - root values to be updated, input state is seen by first process to perform an update 1489 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1490 1491 Level: advanced 1492 1493 Note: 1494 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1495 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1496 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1497 integers. 1498 1499 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1500 @*/ 1501 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1502 { 1503 PetscErrorCode ierr; 1504 1505 PetscFunctionBegin; 1506 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1507 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1508 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1509 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1510 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 /*@C 1515 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1516 1517 Collective 1518 1519 Input Arguments: 1520 + sf - star forest 1521 . unit - data type 1522 . leafdata - leaf values to use in reduction 1523 - op - operation to use for reduction 1524 1525 Output Arguments: 1526 + rootdata - root values to be updated, input state is seen by first process to perform an update 1527 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1528 1529 Level: advanced 1530 1531 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1532 @*/ 1533 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1534 { 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1539 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1540 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1541 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1542 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 /*@C 1547 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1548 1549 Collective 1550 1551 Input Arguments: 1552 + sf - star forest 1553 . unit - data type 1554 - leafdata - leaf data to gather to roots 1555 1556 Output Argument: 1557 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1558 1559 Level: intermediate 1560 1561 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1562 @*/ 1563 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1564 { 1565 PetscErrorCode ierr; 1566 PetscSF multi; 1567 1568 PetscFunctionBegin; 1569 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1570 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1571 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1572 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 1576 /*@C 1577 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1578 1579 Collective 1580 1581 Input Arguments: 1582 + sf - star forest 1583 . unit - data type 1584 - leafdata - leaf data to gather to roots 1585 1586 Output Argument: 1587 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1588 1589 Level: intermediate 1590 1591 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1592 @*/ 1593 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1594 { 1595 PetscErrorCode ierr; 1596 PetscSF multi; 1597 1598 PetscFunctionBegin; 1599 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1600 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1601 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1602 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 /*@C 1607 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1608 1609 Collective 1610 1611 Input Arguments: 1612 + sf - star forest 1613 . unit - data type 1614 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1615 1616 Output Argument: 1617 . leafdata - leaf data to be update with personal data from each respective root 1618 1619 Level: intermediate 1620 1621 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1622 @*/ 1623 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1624 { 1625 PetscErrorCode ierr; 1626 PetscSF multi; 1627 1628 PetscFunctionBegin; 1629 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1630 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1631 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1632 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1633 PetscFunctionReturn(0); 1634 } 1635 1636 /*@C 1637 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1638 1639 Collective 1640 1641 Input Arguments: 1642 + sf - star forest 1643 . unit - data type 1644 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1645 1646 Output Argument: 1647 . leafdata - leaf data to be update with personal data from each respective root 1648 1649 Level: intermediate 1650 1651 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1652 @*/ 1653 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1654 { 1655 PetscErrorCode ierr; 1656 PetscSF multi; 1657 1658 PetscFunctionBegin; 1659 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1660 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1661 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1662 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1663 PetscFunctionReturn(0); 1664 } 1665 1666 /*@ 1667 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1668 1669 Input Parameters: 1670 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse. 1671 - sfB - The second PetscSF, whose number of roots must be equal to number of leaves of sfA on each processor 1672 1673 Output Parameters: 1674 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on sfB 1675 1676 Level: developer 1677 1678 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1679 @*/ 1680 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1681 { 1682 PetscErrorCode ierr; 1683 MPI_Comm comm; 1684 const PetscSFNode *remotePointsA,*remotePointsB; 1685 PetscSFNode *remotePointsBA; 1686 const PetscInt *localPointsA,*localPointsB; 1687 PetscInt numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf; 1688 1689 PetscFunctionBegin; 1690 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1691 PetscSFCheckGraphSet(sfA,1); 1692 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1693 PetscSFCheckGraphSet(sfB,2); 1694 PetscValidPointer(sfBA,3); 1695 ierr = PetscObjectGetComm((PetscObject)sfA,&comm);CHKERRQ(ierr); 1696 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1697 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1698 ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr); 1699 if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space"); 1700 if (numRootsB != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of roots must be equal to the first SF's number of leaves"); 1701 ierr = PetscMalloc1(numLeavesB,&remotePointsBA);CHKERRQ(ierr); 1702 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA);CHKERRQ(ierr); 1703 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA);CHKERRQ(ierr); 1704 ierr = PetscSFCreate(comm, sfBA);CHKERRQ(ierr); 1705 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesB,localPointsB,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 /*@ 1710 PetscSFComposeInverse - Compose a new PetscSF by putting inverse of the second SF under the first one 1711 1712 Input Parameters: 1713 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse. 1714 - sfB - The second PetscSF, whose number of leaves must be equal to number of leaves of sfA on each processor. All roots must have degree 1. 1715 1716 Output Parameters: 1717 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on inverse of sfB 1718 1719 Level: developer 1720 1721 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph() 1722 @*/ 1723 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1724 { 1725 PetscErrorCode ierr; 1726 MPI_Comm comm; 1727 const PetscSFNode *remotePointsA,*remotePointsB; 1728 PetscSFNode *remotePointsBA; 1729 const PetscInt *localPointsA,*localPointsB; 1730 PetscInt numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf; 1731 1732 PetscFunctionBegin; 1733 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1734 PetscSFCheckGraphSet(sfA,1); 1735 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1736 PetscSFCheckGraphSet(sfB,2); 1737 PetscValidPointer(sfBA,3); 1738 ierr = PetscObjectGetComm((PetscObject) sfA, &comm);CHKERRQ(ierr); 1739 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1740 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1741 ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr); 1742 if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space"); 1743 if (numLeavesA != numLeavesB) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of leaves must be equal to the first SF's number of leaves"); 1744 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1745 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr); 1746 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr); 1747 ierr = PetscSFCreate(comm,sfBA);CHKERRQ(ierr); 1748 ierr = PetscSFSetGraph(*sfBA,numRootsA,numRootsB,NULL,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1749 PetscFunctionReturn(0); 1750 } 1751 1752 /* 1753 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1754 1755 Input Parameters: 1756 . sf - The global PetscSF 1757 1758 Output Parameters: 1759 . out - The local PetscSF 1760 */ 1761 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1762 { 1763 MPI_Comm comm; 1764 PetscMPIInt myrank; 1765 const PetscInt *ilocal; 1766 const PetscSFNode *iremote; 1767 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1768 PetscSFNode *liremote; 1769 PetscSF lsf; 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1774 if (sf->ops->CreateLocalSF) { 1775 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1776 } else { 1777 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 1778 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1779 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 1780 1781 /* Find out local edges and build a local SF */ 1782 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1783 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 1784 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 1785 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 1786 1787 for (i=j=0; i<nleaves; i++) { 1788 if (iremote[i].rank == (PetscInt)myrank) { 1789 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 1790 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 1791 liremote[j].index = iremote[i].index; 1792 j++; 1793 } 1794 } 1795 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 1796 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1797 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 1798 *out = lsf; 1799 } 1800 PetscFunctionReturn(0); 1801 } 1802 1803 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 1804 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 1805 { 1806 PetscErrorCode ierr; 1807 1808 PetscFunctionBegin; 1809 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1810 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1811 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1812 if (sf->ops->BcastToZero) { 1813 ierr = (*sf->ops->BcastToZero)(sf,unit,rootdata,leafdata);CHKERRQ(ierr); 1814 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 1815 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1816 PetscFunctionReturn(0); 1817 } 1818 1819