1 #include <petsc-private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petscctable.h> 3 4 /* Logging support */ 5 PetscLogEvent PETSCSF_SetGraph, PETSCSF_BcastBegin, PETSCSF_BcastEnd, PETSCSF_ReduceBegin, PETSCSF_ReduceEnd, PETSCSF_FetchAndOpBegin, PETSCSF_FetchAndOpEnd; 6 7 #if defined(PETSC_USE_DEBUG) 8 # define PetscSFCheckGraphSet(sf,arg) do { \ 9 if (PetscUnlikely(!(sf)->graphset)) \ 10 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 11 } while (0) 12 #else 13 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 14 #endif 15 16 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 17 18 #undef __FUNCT__ 19 #define __FUNCT__ "PetscSFCreate" 20 /*@C 21 PetscSFCreate - create a star forest communication context 22 23 Not Collective 24 25 Input Arguments: 26 . comm - communicator on which the star forest will operate 27 28 Output Arguments: 29 . sf - new star forest context 30 31 Level: intermediate 32 33 .seealso: PetscSFSetGraph(), PetscSFDestroy() 34 @*/ 35 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 36 { 37 PetscErrorCode ierr; 38 PetscSF b; 39 40 PetscFunctionBegin; 41 PetscValidPointer(sf,2); 42 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 43 44 ierr = PetscHeaderCreate(b,_p_PetscSF,struct _PetscSFOps,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 45 46 b->nroots = -1; 47 b->nleaves = -1; 48 b->nranks = -1; 49 b->rankorder = PETSC_TRUE; 50 b->ingroup = MPI_GROUP_NULL; 51 b->outgroup = MPI_GROUP_NULL; 52 b->graphset = PETSC_FALSE; 53 54 *sf = b; 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "PetscSFReset" 60 /*@C 61 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 62 63 Collective 64 65 Input Arguments: 66 . sf - star forest 67 68 Level: advanced 69 70 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 71 @*/ 72 PetscErrorCode PetscSFReset(PetscSF sf) 73 { 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 78 sf->mine = NULL; 79 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 80 sf->remote = NULL; 81 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 82 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 83 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 84 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 85 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 86 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 87 sf->graphset = PETSC_FALSE; 88 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 89 sf->setupcalled = PETSC_FALSE; 90 PetscFunctionReturn(0); 91 } 92 93 #undef __FUNCT__ 94 #define __FUNCT__ "PetscSFSetType" 95 /*@C 96 PetscSFSetType - set the PetscSF communication implementation 97 98 Collective on PetscSF 99 100 Input Parameters: 101 + sf - the PetscSF context 102 - type - a known method 103 104 Options Database Key: 105 . -sf_type <type> - Sets the method; use -help for a list 106 of available methods (for instance, window, pt2pt, neighbor) 107 108 Notes: 109 See "include/petscsf.h" for available methods (for instance) 110 + PETSCSFWINDOW - MPI-2/3 one-sided 111 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 112 113 Level: intermediate 114 115 .keywords: PetscSF, set, type 116 117 .seealso: PetscSFType, PetscSFCreate() 118 @*/ 119 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 120 { 121 PetscErrorCode ierr,(*r)(PetscSF); 122 PetscBool match; 123 124 PetscFunctionBegin; 125 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 126 PetscValidCharPointer(type,2); 127 128 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 129 if (match) PetscFunctionReturn(0); 130 131 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 132 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 133 /* Destroy the previous private PetscSF context */ 134 if (sf->ops->Destroy) { 135 ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr); 136 } 137 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 138 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 139 ierr = (*r)(sf);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "PetscSFDestroy" 145 /*@C 146 PetscSFDestroy - destroy star forest 147 148 Collective 149 150 Input Arguments: 151 . sf - address of star forest 152 153 Level: intermediate 154 155 .seealso: PetscSFCreate(), PetscSFReset() 156 @*/ 157 PetscErrorCode PetscSFDestroy(PetscSF *sf) 158 { 159 PetscErrorCode ierr; 160 161 PetscFunctionBegin; 162 if (!*sf) PetscFunctionReturn(0); 163 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 164 if (--((PetscObject)(*sf))->refct > 0) {*sf = 0; PetscFunctionReturn(0);} 165 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 166 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 167 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "PetscSFSetUp" 173 /*@ 174 PetscSFSetUp - set up communication structures 175 176 Collective 177 178 Input Arguments: 179 . sf - star forest communication object 180 181 Level: beginner 182 183 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 184 @*/ 185 PetscErrorCode PetscSFSetUp(PetscSF sf) 186 { 187 PetscErrorCode ierr; 188 189 PetscFunctionBegin; 190 if (sf->setupcalled) PetscFunctionReturn(0); 191 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 192 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 193 sf->setupcalled = PETSC_TRUE; 194 PetscFunctionReturn(0); 195 } 196 197 #undef __FUNCT__ 198 #define __FUNCT__ "PetscSFSetFromOptions" 199 /*@C 200 PetscSFSetFromOptions - set PetscSF options using the options database 201 202 Logically Collective 203 204 Input Arguments: 205 . sf - star forest 206 207 Options Database Keys: 208 . -sf_synchronization - synchronization type used by PetscSF 209 210 Level: intermediate 211 212 .keywords: KSP, set, from, options, database 213 214 .seealso: PetscSFWindowSetSyncType() 215 @*/ 216 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 217 { 218 PetscSFType deft; 219 char type[256]; 220 PetscErrorCode ierr; 221 PetscBool flg; 222 223 PetscFunctionBegin; 224 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 225 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 226 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 227 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,256,&flg);CHKERRQ(ierr); 228 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 229 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 230 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(sf);CHKERRQ(ierr);} 231 ierr = PetscOptionsEnd();CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "PetscSFSetRankOrder" 237 /*@C 238 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 239 240 Logically Collective 241 242 Input Arguments: 243 + sf - star forest 244 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 245 246 Level: advanced 247 248 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 249 @*/ 250 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 251 { 252 253 PetscFunctionBegin; 254 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 255 PetscValidLogicalCollectiveBool(sf,flg,2); 256 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 257 sf->rankorder = flg; 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "PetscSFSetGraph" 263 /*@C 264 PetscSFSetGraph - Set a parallel star forest 265 266 Collective 267 268 Input Arguments: 269 + sf - star forest 270 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 271 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 272 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 273 . localmode - copy mode for ilocal 274 . iremote - remote locations of root vertices for each leaf on the current process 275 - remotemode - copy mode for iremote 276 277 Level: intermediate 278 279 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 280 @*/ 281 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 282 { 283 PetscErrorCode ierr; 284 PetscTable table; 285 PetscTablePosition pos; 286 PetscMPIInt size; 287 PetscInt i,*rcount,*ranks; 288 289 PetscFunctionBegin; 290 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 291 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 292 if (nleaves && ilocal) PetscValidIntPointer(ilocal,4); 293 if (nleaves) PetscValidPointer(iremote,6); 294 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots); 295 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 296 ierr = PetscSFReset(sf);CHKERRQ(ierr); 297 sf->nroots = nroots; 298 sf->nleaves = nleaves; 299 if (ilocal) { 300 switch (localmode) { 301 case PETSC_COPY_VALUES: 302 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 303 sf->mine = sf->mine_alloc; 304 ierr = PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));CHKERRQ(ierr); 305 sf->minleaf = PETSC_MAX_INT; 306 sf->maxleaf = PETSC_MIN_INT; 307 for (i=0; i<nleaves; i++) { 308 sf->minleaf = PetscMin(sf->minleaf,ilocal[i]); 309 sf->maxleaf = PetscMax(sf->maxleaf,ilocal[i]); 310 } 311 break; 312 case PETSC_OWN_POINTER: 313 sf->mine_alloc = (PetscInt*)ilocal; 314 sf->mine = sf->mine_alloc; 315 break; 316 case PETSC_USE_POINTER: 317 sf->mine = (PetscInt*)ilocal; 318 break; 319 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 320 } 321 } 322 if (!ilocal || nleaves > 0) { 323 sf->minleaf = 0; 324 sf->maxleaf = nleaves - 1; 325 } 326 switch (remotemode) { 327 case PETSC_COPY_VALUES: 328 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 329 sf->remote = sf->remote_alloc; 330 ierr = PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));CHKERRQ(ierr); 331 break; 332 case PETSC_OWN_POINTER: 333 sf->remote_alloc = (PetscSFNode*)iremote; 334 sf->remote = sf->remote_alloc; 335 break; 336 case PETSC_USE_POINTER: 337 sf->remote = (PetscSFNode*)iremote; 338 break; 339 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 340 } 341 342 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 343 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 344 for (i=0; i<nleaves; i++) { 345 /* Log 1-based rank */ 346 ierr = PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 347 } 348 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 349 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,nleaves,&sf->rmine,nleaves,&sf->rremote);CHKERRQ(ierr); 350 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 351 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 352 for (i=0; i<sf->nranks; i++) { 353 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 354 ranks[i]--; /* Convert back to 0-based */ 355 } 356 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 357 ierr = PetscSortIntWithArray(sf->nranks,ranks,rcount);CHKERRQ(ierr); 358 sf->roffset[0] = 0; 359 for (i=0; i<sf->nranks; i++) { 360 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 361 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 362 rcount[i] = 0; 363 } 364 for (i=0; i<nleaves; i++) { 365 PetscInt lo,hi,irank; 366 /* Search for index of iremote[i].rank in sf->ranks */ 367 lo = 0; hi = sf->nranks; 368 while (hi - lo > 1) { 369 PetscInt mid = lo + (hi - lo)/2; 370 if (iremote[i].rank < sf->ranks[mid]) hi = mid; 371 else lo = mid; 372 } 373 if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo; 374 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank); 375 sf->rmine[sf->roffset[irank] + rcount[irank]] = ilocal ? ilocal[i] : i; 376 sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index; 377 rcount[irank]++; 378 } 379 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 380 #if !defined(PETSC_USE_64BIT_INDICES) 381 if (nroots == PETSC_DETERMINE) { 382 /* Jed, if you have a better way to do this, put it in */ 383 PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0; 384 385 /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */ 386 ierr = PetscMalloc4(size,&numRankLeaves,size+1,&leafOff,size,&numRankRoots,size+1,&rootOff);CHKERRQ(ierr); 387 ierr = PetscMemzero(numRankLeaves, size * sizeof(PetscInt));CHKERRQ(ierr); 388 for (i = 0; i < nleaves; ++i) ++numRankLeaves[iremote[i].rank]; 389 ierr = MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 390 /* Could set nroots to this maximum */ 391 for (i = 0; i < size; ++i) maxRoots += numRankRoots[i]; 392 393 /* Gather all indices */ 394 ierr = PetscMalloc2(nleaves,&leafIndices,maxRoots,&rootIndices);CHKERRQ(ierr); 395 leafOff[0] = 0; 396 for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; 397 for (i = 0; i < nleaves; ++i) leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index; 398 leafOff[0] = 0; 399 for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; 400 rootOff[0] = 0; 401 for (i = 0; i < size; ++i) rootOff[i+1] = rootOff[i] + numRankRoots[i]; 402 ierr = MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 403 /* Sort and reduce */ 404 ierr = PetscSortRemoveDupsInt(&maxRoots, rootIndices);CHKERRQ(ierr); 405 ierr = PetscFree2(leafIndices,rootIndices);CHKERRQ(ierr); 406 ierr = PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);CHKERRQ(ierr); 407 sf->nroots = maxRoots; 408 } 409 #endif 410 411 sf->graphset = PETSC_TRUE; 412 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "PetscSFCreateInverseSF" 418 /*@C 419 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 420 421 Collective 422 423 Input Arguments: 424 . sf - star forest to invert 425 426 Output Arguments: 427 . isf - inverse of sf 428 429 Level: advanced 430 431 Notes: 432 All roots must have degree 1. 433 434 The local space may be a permutation, but cannot be sparse. 435 436 .seealso: PetscSFSetGraph() 437 @*/ 438 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 439 { 440 PetscErrorCode ierr; 441 PetscMPIInt rank; 442 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 443 const PetscInt *ilocal; 444 PetscSFNode *roots,*leaves; 445 446 PetscFunctionBegin; 447 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 448 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 449 for (i=0,maxlocal=0; i<nleaves; i++) maxlocal = PetscMax(maxlocal,(ilocal ? ilocal[i] : i)+1); 450 ierr = PetscMalloc2(nroots,&roots,nleaves,&leaves);CHKERRQ(ierr); 451 for (i=0; i<nleaves; i++) { 452 leaves[i].rank = rank; 453 leaves[i].index = i; 454 } 455 for (i=0; i <nroots; i++) { 456 roots[i].rank = -1; 457 roots[i].index = -1; 458 } 459 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 460 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 461 462 /* Check whether our leaves are sparse */ 463 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 464 if (count == nroots) newilocal = NULL; 465 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 466 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 467 for (i=0,count=0; i<nroots; i++) { 468 if (roots[i].rank >= 0) { 469 newilocal[count] = i; 470 roots[count].rank = roots[i].rank; 471 roots[count].index = roots[i].index; 472 count++; 473 } 474 } 475 } 476 477 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 478 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 479 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "PetscSFDuplicate" 485 /*@ 486 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 487 488 Collective 489 490 Input Arguments: 491 + sf - communication object to duplicate 492 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 493 494 Output Arguments: 495 . newsf - new communication object 496 497 Level: beginner 498 499 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 500 @*/ 501 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 502 { 503 PetscErrorCode ierr; 504 505 PetscFunctionBegin; 506 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 507 ierr = PetscSFSetType(*newsf,((PetscObject)sf)->type_name);CHKERRQ(ierr); 508 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 509 if (opt == PETSCSF_DUPLICATE_GRAPH) { 510 PetscInt nroots,nleaves; 511 const PetscInt *ilocal; 512 const PetscSFNode *iremote; 513 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 514 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 515 } 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNCT__ 520 #define __FUNCT__ "PetscSFGetGraph" 521 /*@C 522 PetscSFGetGraph - Get the graph specifying a parallel star forest 523 524 Not Collective 525 526 Input Arguments: 527 . sf - star forest 528 529 Output Arguments: 530 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 531 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 532 . ilocal - locations of leaves in leafdata buffers 533 - iremote - remote locations of root vertices for each leaf on the current process 534 535 Level: intermediate 536 537 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 538 @*/ 539 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 540 { 541 542 PetscFunctionBegin; 543 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 544 /* We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set */ 545 /* if (!sf->graphset) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Graph has not been set, must call PetscSFSetGraph()"); */ 546 if (nroots) *nroots = sf->nroots; 547 if (nleaves) *nleaves = sf->nleaves; 548 if (ilocal) *ilocal = sf->mine; 549 if (iremote) *iremote = sf->remote; 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "PetscSFGetLeafRange" 555 /*@C 556 PetscSFGetLeafRange - Get the active leaf ranges 557 558 Not Collective 559 560 Input Arguments: 561 . sf - star forest 562 563 Output Arguments: 564 + minleaf - minimum active leaf on this process 565 - maxleaf - maximum active leaf on this process 566 567 Level: developer 568 569 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 570 @*/ 571 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 572 { 573 574 PetscFunctionBegin; 575 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 576 if (minleaf) *minleaf = sf->minleaf; 577 if (maxleaf) *maxleaf = sf->maxleaf; 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "PetscSFView" 583 /*@C 584 PetscSFView - view a star forest 585 586 Collective 587 588 Input Arguments: 589 + sf - star forest 590 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 591 592 Level: beginner 593 594 .seealso: PetscSFCreate(), PetscSFSetGraph() 595 @*/ 596 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 597 { 598 PetscErrorCode ierr; 599 PetscBool iascii; 600 PetscViewerFormat format; 601 602 PetscFunctionBegin; 603 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 604 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 605 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 606 PetscCheckSameComm(sf,1,viewer,2); 607 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 608 if (iascii) { 609 PetscMPIInt rank; 610 PetscInt i,j; 611 612 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 613 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 614 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 615 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 616 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 617 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 618 for (i=0; i<sf->nleaves; i++) { 619 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 620 } 621 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 622 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 623 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 624 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 625 for (i=0; i<sf->nranks; i++) { 626 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 627 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 628 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 633 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 634 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 635 } 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "PetscSFGetRanks" 641 /*@C 642 PetscSFGetRanks - Get ranks and number of vertices referenced by leaves on this process 643 644 Not Collective 645 646 Input Arguments: 647 . sf - star forest 648 649 Output Arguments: 650 + nranks - number of ranks referenced by local part 651 . ranks - array of ranks 652 . roffset - offset in rmine/rremote for each rank (length nranks+1) 653 . rmine - concatenated array holding local indices referencing each remote rank 654 - rremote - concatenated array holding remote indices referenced for each remote rank 655 656 Level: developer 657 658 .seealso: PetscSFSetGraph() 659 @*/ 660 PetscErrorCode PetscSFGetRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 661 { 662 663 PetscFunctionBegin; 664 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 665 if (nranks) *nranks = sf->nranks; 666 if (ranks) *ranks = sf->ranks; 667 if (roffset) *roffset = sf->roffset; 668 if (rmine) *rmine = sf->rmine; 669 if (rremote) *rremote = sf->rremote; 670 PetscFunctionReturn(0); 671 } 672 673 #undef __FUNCT__ 674 #define __FUNCT__ "PetscSFGetGroups" 675 /*@C 676 PetscSFGetGroups - gets incoming and outgoing process groups 677 678 Collective 679 680 Input Argument: 681 . sf - star forest 682 683 Output Arguments: 684 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 685 - outgoing - group of destination processes for outgoing edges (roots that I reference) 686 687 Level: developer 688 689 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 690 @*/ 691 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 692 { 693 PetscErrorCode ierr; 694 MPI_Group group; 695 696 PetscFunctionBegin; 697 if (sf->ingroup == MPI_GROUP_NULL) { 698 PetscInt i; 699 const PetscInt *indegree; 700 PetscMPIInt rank,*outranks,*inranks; 701 PetscSFNode *remote; 702 PetscSF bgcount; 703 704 /* Compute the number of incoming ranks */ 705 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 706 for (i=0; i<sf->nranks; i++) { 707 remote[i].rank = sf->ranks[i]; 708 remote[i].index = 0; 709 } 710 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 711 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 712 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 713 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 714 715 /* Enumerate the incoming ranks */ 716 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 717 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 718 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 719 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 720 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 721 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 722 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 723 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 724 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 725 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 726 } 727 *incoming = sf->ingroup; 728 729 if (sf->outgroup == MPI_GROUP_NULL) { 730 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 731 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 732 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 733 } 734 *outgoing = sf->outgroup; 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "PetscSFGetMultiSF" 740 /*@C 741 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 742 743 Collective 744 745 Input Argument: 746 . sf - star forest that may contain roots with 0 or with more than 1 vertex 747 748 Output Arguments: 749 . multi - star forest with split roots, such that each root has degree exactly 1 750 751 Level: developer 752 753 Notes: 754 755 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 756 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 757 edge, it is a candidate for future optimization that might involve its removal. 758 759 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin() 760 @*/ 761 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 762 { 763 PetscErrorCode ierr; 764 765 PetscFunctionBegin; 766 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 767 PetscValidPointer(multi,2); 768 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 769 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 770 *multi = sf->multi; 771 PetscFunctionReturn(0); 772 } 773 if (!sf->multi) { 774 const PetscInt *indegree; 775 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 776 PetscSFNode *remote; 777 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 778 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 779 for (i=0,maxlocal=0; i<sf->nleaves; i++) maxlocal = PetscMax(maxlocal,(sf->mine ? sf->mine[i] : i)+1); 780 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 781 inoffset[0] = 0; 782 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 783 for (i=0; i<maxlocal; i++) outones[i] = 1; 784 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPIU_SUM);CHKERRQ(ierr); 785 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPIU_SUM);CHKERRQ(ierr); 786 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 787 #if 0 788 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 789 for (i=0; i<sf->nroots; i++) { 790 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 791 } 792 #endif 793 #endif 794 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 795 for (i=0; i<sf->nleaves; i++) { 796 remote[i].rank = sf->remote[i].rank; 797 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 798 } 799 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 800 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 801 if (sf->rankorder) { /* Sort the ranks */ 802 PetscMPIInt rank; 803 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 804 PetscSFNode *newremote; 805 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 806 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 807 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 808 for (i=0; i<maxlocal; i++) outranks[i] = rank; 809 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 810 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 811 /* Sort the incoming ranks at each vertex, build the inverse map */ 812 for (i=0; i<sf->nroots; i++) { 813 PetscInt j; 814 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 815 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 816 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 817 } 818 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 819 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 820 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 821 for (i=0; i<sf->nleaves; i++) { 822 newremote[i].rank = sf->remote[i].rank; 823 newremote[i].index = newoutoffset[i]; 824 } 825 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,NULL,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 826 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 827 } 828 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 829 } 830 *multi = sf->multi; 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "PetscSFCreateEmbeddedSF" 836 /*@C 837 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 838 839 Collective 840 841 Input Arguments: 842 + sf - original star forest 843 . nroots - number of roots to select on this process 844 - selected - selected roots on this process 845 846 Output Arguments: 847 . newsf - new star forest 848 849 Level: advanced 850 851 Note: 852 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 853 be done by calling PetscSFGetGraph(). 854 855 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 856 @*/ 857 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nroots,const PetscInt *selected,PetscSF *newsf) 858 { 859 PetscInt *rootdata, *leafdata, *ilocal; 860 PetscSFNode *iremote; 861 PetscInt leafsize = 0, nleaves = 0, n, i; 862 PetscErrorCode ierr; 863 864 PetscFunctionBegin; 865 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 866 if (nroots) PetscValidPointer(selected,3); 867 PetscValidPointer(newsf,4); 868 if (sf->mine) for (i = 0; i < sf->nleaves; ++i) {leafsize = PetscMax(leafsize, sf->mine[i]+1);} 869 else leafsize = sf->nleaves; 870 ierr = PetscCalloc2(sf->nroots,&rootdata,leafsize,&leafdata);CHKERRQ(ierr); 871 for (i=0; i<nroots; ++i) rootdata[selected[i]] = 1; 872 ierr = PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 873 ierr = PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr); 874 875 for (i = 0; i < leafsize; ++i) nleaves += leafdata[i]; 876 ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 877 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 878 for (i = 0, n = 0; i < sf->nleaves; ++i) { 879 const PetscInt lidx = sf->mine ? sf->mine[i] : i; 880 881 if (leafdata[lidx]) { 882 ilocal[n] = lidx; 883 iremote[n].rank = sf->remote[i].rank; 884 iremote[n].index = sf->remote[i].index; 885 ++n; 886 } 887 } 888 if (n != nleaves) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "There is a size mismatch in the SF embedding, %d != %d", n, nleaves); 889 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,newsf);CHKERRQ(ierr); 890 ierr = PetscSFSetGraph(*newsf,sf->nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 891 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "PetscSFBcastBegin" 897 /*@C 898 PetscSFBcastBegin - begin pointwise broadcast to be concluded with call to PetscSFBcastEnd() 899 900 Collective on PetscSF 901 902 Input Arguments: 903 + sf - star forest on which to communicate 904 . unit - data type associated with each node 905 - rootdata - buffer to broadcast 906 907 Output Arguments: 908 . leafdata - buffer to update with values from each leaf's respective root 909 910 Level: intermediate 911 912 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFView(), PetscSFBcastEnd(), PetscSFReduceBegin() 913 @*/ 914 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 915 { 916 PetscErrorCode ierr; 917 918 PetscFunctionBegin; 919 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 920 PetscSFCheckGraphSet(sf,1); 921 ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 922 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 923 ierr = (*sf->ops->BcastBegin)(sf,unit,rootdata,leafdata);CHKERRQ(ierr); 924 ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "PetscSFBcastEnd" 930 /*@C 931 PetscSFBcastEnd - end a broadcast operation started with PetscSFBcastBegin() 932 933 Collective 934 935 Input Arguments: 936 + sf - star forest 937 . unit - data type 938 - rootdata - buffer to broadcast 939 940 Output Arguments: 941 . leafdata - buffer to update with values from each leaf's respective root 942 943 Level: intermediate 944 945 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 946 @*/ 947 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 948 { 949 PetscErrorCode ierr; 950 951 PetscFunctionBegin; 952 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 953 PetscSFCheckGraphSet(sf,1); 954 ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr); 955 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 956 ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata);CHKERRQ(ierr); 957 ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 #undef __FUNCT__ 962 #define __FUNCT__ "PetscSFReduceBegin" 963 /*@C 964 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 965 966 Collective 967 968 Input Arguments: 969 + sf - star forest 970 . unit - data type 971 . leafdata - values to reduce 972 - op - reduction operation 973 974 Output Arguments: 975 . rootdata - result of reduction of values from all leaves of each root 976 977 Level: intermediate 978 979 .seealso: PetscSFBcastBegin() 980 @*/ 981 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 982 { 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 987 PetscSFCheckGraphSet(sf,1); 988 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 989 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 990 ierr = (sf->ops->ReduceBegin)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 991 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } 994 995 #undef __FUNCT__ 996 #define __FUNCT__ "PetscSFReduceEnd" 997 /*@C 998 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 999 1000 Collective 1001 1002 Input Arguments: 1003 + sf - star forest 1004 . unit - data type 1005 . leafdata - values to reduce 1006 - op - reduction operation 1007 1008 Output Arguments: 1009 . rootdata - result of reduction of values from all leaves of each root 1010 1011 Level: intermediate 1012 1013 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1014 @*/ 1015 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1016 { 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1021 PetscSFCheckGraphSet(sf,1); 1022 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1023 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1024 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1025 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "PetscSFComputeDegreeBegin" 1031 /*@C 1032 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1033 1034 Collective 1035 1036 Input Arguments: 1037 . sf - star forest 1038 1039 Output Arguments: 1040 . degree - degree of each root vertex 1041 1042 Level: advanced 1043 1044 .seealso: PetscSFGatherBegin() 1045 @*/ 1046 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1047 { 1048 PetscErrorCode ierr; 1049 1050 PetscFunctionBegin; 1051 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1052 PetscSFCheckGraphSet(sf,1); 1053 PetscValidPointer(degree,2); 1054 if (!sf->degreeknown) { 1055 PetscInt i,maxlocal; 1056 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1057 for (i=0,maxlocal=0; i<sf->nleaves; i++) maxlocal = PetscMax(maxlocal,(sf->mine ? sf->mine[i] : i)+1); 1058 ierr = PetscMalloc1(sf->nroots,&sf->degree);CHKERRQ(ierr); 1059 ierr = PetscMalloc1(maxlocal,&sf->degreetmp);CHKERRQ(ierr); 1060 for (i=0; i<sf->nroots; i++) sf->degree[i] = 0; 1061 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1062 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPIU_SUM);CHKERRQ(ierr); 1063 } 1064 *degree = NULL; 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "PetscSFComputeDegreeEnd" 1070 /*@C 1071 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1072 1073 Collective 1074 1075 Input Arguments: 1076 . sf - star forest 1077 1078 Output Arguments: 1079 . degree - degree of each root vertex 1080 1081 Level: developer 1082 1083 .seealso: 1084 @*/ 1085 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1086 { 1087 PetscErrorCode ierr; 1088 1089 PetscFunctionBegin; 1090 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1091 PetscSFCheckGraphSet(sf,1); 1092 if (!sf->degreeknown) { 1093 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPIU_SUM);CHKERRQ(ierr); 1094 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1095 1096 sf->degreeknown = PETSC_TRUE; 1097 } 1098 *degree = sf->degree; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "PetscSFFetchAndOpBegin" 1104 /*@C 1105 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1106 1107 Collective 1108 1109 Input Arguments: 1110 + sf - star forest 1111 . unit - data type 1112 . leafdata - leaf values to use in reduction 1113 - op - operation to use for reduction 1114 1115 Output Arguments: 1116 + rootdata - root values to be updated, input state is seen by first process to perform an update 1117 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1118 1119 Level: advanced 1120 1121 Note: 1122 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1123 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1124 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1125 integers. 1126 1127 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1128 @*/ 1129 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1130 { 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1135 PetscSFCheckGraphSet(sf,1); 1136 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1137 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1138 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1139 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "PetscSFFetchAndOpEnd" 1145 /*@C 1146 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1147 1148 Collective 1149 1150 Input Arguments: 1151 + sf - star forest 1152 . unit - data type 1153 . leafdata - leaf values to use in reduction 1154 - op - operation to use for reduction 1155 1156 Output Arguments: 1157 + rootdata - root values to be updated, input state is seen by first process to perform an update 1158 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1159 1160 Level: advanced 1161 1162 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1163 @*/ 1164 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1165 { 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1170 PetscSFCheckGraphSet(sf,1); 1171 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1172 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1173 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1174 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "PetscSFGatherBegin" 1180 /*@C 1181 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1182 1183 Collective 1184 1185 Input Arguments: 1186 + sf - star forest 1187 . unit - data type 1188 - leafdata - leaf data to gather to roots 1189 1190 Output Argument: 1191 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1192 1193 Level: intermediate 1194 1195 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1196 @*/ 1197 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1198 { 1199 PetscErrorCode ierr; 1200 PetscSF multi; 1201 1202 PetscFunctionBegin; 1203 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1204 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1205 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "PetscSFGatherEnd" 1211 /*@C 1212 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1213 1214 Collective 1215 1216 Input Arguments: 1217 + sf - star forest 1218 . unit - data type 1219 - leafdata - leaf data to gather to roots 1220 1221 Output Argument: 1222 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1223 1224 Level: intermediate 1225 1226 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1227 @*/ 1228 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1229 { 1230 PetscErrorCode ierr; 1231 PetscSF multi; 1232 1233 PetscFunctionBegin; 1234 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1235 PetscSFCheckGraphSet(sf,1); 1236 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1237 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1238 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "PetscSFScatterBegin" 1244 /*@C 1245 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1246 1247 Collective 1248 1249 Input Arguments: 1250 + sf - star forest 1251 . unit - data type 1252 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1253 1254 Output Argument: 1255 . leafdata - leaf data to be update with personal data from each respective root 1256 1257 Level: intermediate 1258 1259 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1260 @*/ 1261 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1262 { 1263 PetscErrorCode ierr; 1264 PetscSF multi; 1265 1266 PetscFunctionBegin; 1267 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1268 PetscSFCheckGraphSet(sf,1); 1269 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1270 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1271 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "PetscSFScatterEnd" 1277 /*@C 1278 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1279 1280 Collective 1281 1282 Input Arguments: 1283 + sf - star forest 1284 . unit - data type 1285 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1286 1287 Output Argument: 1288 . leafdata - leaf data to be update with personal data from each respective root 1289 1290 Level: intermediate 1291 1292 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1293 @*/ 1294 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1295 { 1296 PetscErrorCode ierr; 1297 PetscSF multi; 1298 1299 PetscFunctionBegin; 1300 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1301 PetscSFCheckGraphSet(sf,1); 1302 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1303 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1304 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307