1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 { 98 PetscInt i; 99 for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}} 100 } 101 #endif 102 sf->degreeknown = PETSC_FALSE; 103 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 104 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 105 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 106 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 107 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 108 sf->setupcalled = PETSC_FALSE; 109 PetscFunctionReturn(0); 110 } 111 112 /*@C 113 PetscSFSetType - Set the PetscSF communication implementation 114 115 Collective on PetscSF 116 117 Input Parameters: 118 + sf - the PetscSF context 119 - type - a known method 120 121 Options Database Key: 122 . -sf_type <type> - Sets the method; use -help for a list 123 of available methods (for instance, window, basic, neighbor) 124 125 Notes: 126 See "include/petscsf.h" for available methods (for instance) 127 + PETSCSFWINDOW - MPI-2/3 one-sided 128 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 129 130 Level: intermediate 131 132 .seealso: PetscSFType, PetscSFCreate() 133 @*/ 134 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 135 { 136 PetscErrorCode ierr,(*r)(PetscSF); 137 PetscBool match; 138 139 PetscFunctionBegin; 140 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 141 PetscValidCharPointer(type,2); 142 143 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 144 if (match) PetscFunctionReturn(0); 145 146 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 147 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 148 /* Destroy the previous PetscSF implementation context */ 149 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 150 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 151 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 152 ierr = (*r)(sf);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 /*@C 157 PetscSFGetType - Get the PetscSF communication implementation 158 159 Not Collective 160 161 Input Parameter: 162 . sf - the PetscSF context 163 164 Output Parameter: 165 . type - the PetscSF type name 166 167 Level: intermediate 168 169 .seealso: PetscSFSetType(), PetscSFCreate() 170 @*/ 171 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 172 { 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 175 PetscValidPointer(type,2); 176 *type = ((PetscObject)sf)->type_name; 177 PetscFunctionReturn(0); 178 } 179 180 /*@ 181 PetscSFDestroy - destroy star forest 182 183 Collective 184 185 Input Arguments: 186 . sf - address of star forest 187 188 Level: intermediate 189 190 .seealso: PetscSFCreate(), PetscSFReset() 191 @*/ 192 PetscErrorCode PetscSFDestroy(PetscSF *sf) 193 { 194 PetscErrorCode ierr; 195 196 PetscFunctionBegin; 197 if (!*sf) PetscFunctionReturn(0); 198 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 199 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 200 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 201 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 202 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 207 { 208 #if defined(PETSC_USE_DEBUG) 209 PetscInt i, nleaves; 210 PetscMPIInt size; 211 const PetscInt *ilocal; 212 const PetscSFNode *iremote; 213 PetscErrorCode ierr; 214 215 PetscFunctionBegin; 216 if (!sf->graphset) PetscFunctionReturn(0); 217 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 218 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 219 for (i = 0; i < nleaves; i++) { 220 const PetscInt rank = iremote[i].rank; 221 const PetscInt remote = iremote[i].index; 222 const PetscInt leaf = ilocal ? ilocal[i] : i; 223 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 224 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 225 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 226 } 227 PetscFunctionReturn(0); 228 #else 229 PetscFunctionBegin; 230 PetscFunctionReturn(0); 231 #endif 232 } 233 234 /*@ 235 PetscSFSetUp - set up communication structures 236 237 Collective 238 239 Input Arguments: 240 . sf - star forest communication object 241 242 Level: beginner 243 244 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 245 @*/ 246 PetscErrorCode PetscSFSetUp(PetscSF sf) 247 { 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 252 PetscSFCheckGraphSet(sf,1); 253 if (sf->setupcalled) PetscFunctionReturn(0); 254 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 255 sf->use_gpu_aware_mpi = use_gpu_aware_mpi; 256 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 257 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 258 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 259 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 260 sf->setupcalled = PETSC_TRUE; 261 PetscFunctionReturn(0); 262 } 263 264 /*@ 265 PetscSFSetFromOptions - set PetscSF options using the options database 266 267 Logically Collective 268 269 Input Arguments: 270 . sf - star forest 271 272 Options Database Keys: 273 + -sf_type - implementation type, see PetscSFSetType() 274 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 275 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 276 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 277 If true, this option only works with -use_cuda_aware_mpi 1. 278 - -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 279 If true, this option only works with -use_cuda_aware_mpi 1. 280 281 Level: intermediate 282 @*/ 283 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 284 { 285 PetscSFType deft; 286 char type[256]; 287 PetscErrorCode ierr; 288 PetscBool flg; 289 290 PetscFunctionBegin; 291 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 292 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 293 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 294 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 295 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 296 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 297 298 #if defined(PETSC_HAVE_CUDA) 299 sf->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 300 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 301 sf->use_stream_aware_mpi = PETSC_FALSE; 302 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 303 #endif 304 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 305 ierr = PetscOptionsEnd();CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 /*@ 310 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 311 312 Logically Collective 313 314 Input Arguments: 315 + sf - star forest 316 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 317 318 Level: advanced 319 320 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 321 @*/ 322 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 323 { 324 PetscFunctionBegin; 325 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 326 PetscValidLogicalCollectiveBool(sf,flg,2); 327 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 328 sf->rankorder = flg; 329 PetscFunctionReturn(0); 330 } 331 332 /*@ 333 PetscSFSetGraph - Set a parallel star forest 334 335 Collective 336 337 Input Arguments: 338 + sf - star forest 339 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 340 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 341 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 342 during setup in debug mode) 343 . localmode - copy mode for ilocal 344 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 345 during setup in debug mode) 346 - remotemode - copy mode for iremote 347 348 Level: intermediate 349 350 Notes: 351 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 352 353 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 354 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 355 needed 356 357 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 358 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 359 360 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 361 @*/ 362 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 363 { 364 PetscErrorCode ierr; 365 366 PetscFunctionBegin; 367 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 368 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 369 if (nleaves > 0) PetscValidPointer(iremote,6); 370 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 371 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 372 373 if (sf->nroots >= 0) { /* Reset only if graph already set */ 374 ierr = PetscSFReset(sf);CHKERRQ(ierr); 375 } 376 377 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 378 379 sf->nroots = nroots; 380 sf->nleaves = nleaves; 381 382 if (nleaves && ilocal) { 383 PetscInt i; 384 PetscInt minleaf = PETSC_MAX_INT; 385 PetscInt maxleaf = PETSC_MIN_INT; 386 int contiguous = 1; 387 for (i=0; i<nleaves; i++) { 388 minleaf = PetscMin(minleaf,ilocal[i]); 389 maxleaf = PetscMax(maxleaf,ilocal[i]); 390 contiguous &= (ilocal[i] == i); 391 } 392 sf->minleaf = minleaf; 393 sf->maxleaf = maxleaf; 394 if (contiguous) { 395 if (localmode == PETSC_OWN_POINTER) { 396 ierr = PetscFree(ilocal);CHKERRQ(ierr); 397 } 398 ilocal = NULL; 399 } 400 } else { 401 sf->minleaf = 0; 402 sf->maxleaf = nleaves - 1; 403 } 404 405 if (ilocal) { 406 switch (localmode) { 407 case PETSC_COPY_VALUES: 408 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 409 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 410 sf->mine = sf->mine_alloc; 411 break; 412 case PETSC_OWN_POINTER: 413 sf->mine_alloc = (PetscInt*)ilocal; 414 sf->mine = sf->mine_alloc; 415 break; 416 case PETSC_USE_POINTER: 417 sf->mine_alloc = NULL; 418 sf->mine = (PetscInt*)ilocal; 419 break; 420 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 421 } 422 } 423 424 switch (remotemode) { 425 case PETSC_COPY_VALUES: 426 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 427 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 428 sf->remote = sf->remote_alloc; 429 break; 430 case PETSC_OWN_POINTER: 431 sf->remote_alloc = (PetscSFNode*)iremote; 432 sf->remote = sf->remote_alloc; 433 break; 434 case PETSC_USE_POINTER: 435 sf->remote_alloc = NULL; 436 sf->remote = (PetscSFNode*)iremote; 437 break; 438 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 439 } 440 441 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 442 sf->graphset = PETSC_TRUE; 443 PetscFunctionReturn(0); 444 } 445 446 /*@ 447 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 448 449 Collective 450 451 Input Parameters: 452 + sf - The PetscSF 453 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 454 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 455 456 Notes: 457 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 458 n and N are local and global sizes of x respectively. 459 460 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 461 sequential vectors y on all ranks. 462 463 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 464 sequential vector y on rank 0. 465 466 In above cases, entries of x are roots and entries of y are leaves. 467 468 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 469 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 470 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 471 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 472 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 473 474 In this case, roots and leaves are symmetric. 475 476 Level: intermediate 477 @*/ 478 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 479 { 480 MPI_Comm comm; 481 PetscInt n,N,res[2]; 482 PetscMPIInt rank,size; 483 PetscSFType type; 484 PetscErrorCode ierr; 485 486 PetscFunctionBegin; 487 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 488 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 489 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 490 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 491 492 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 493 type = PETSCSFALLTOALL; 494 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 495 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 496 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 497 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 498 } else { 499 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 500 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 501 res[0] = n; 502 res[1] = -n; 503 /* Check if n are same over all ranks so that we can optimize it */ 504 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 505 if (res[0] == -res[1]) { /* same n */ 506 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 507 } else { 508 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 509 } 510 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 511 } 512 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 513 514 sf->pattern = pattern; 515 sf->mine = NULL; /* Contiguous */ 516 517 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 518 Also set other easy stuff. 519 */ 520 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 521 sf->nleaves = N; 522 sf->nroots = n; 523 sf->nranks = size; 524 sf->minleaf = 0; 525 sf->maxleaf = N - 1; 526 } else if (pattern == PETSCSF_PATTERN_GATHER) { 527 sf->nleaves = rank ? 0 : N; 528 sf->nroots = n; 529 sf->nranks = rank ? 0 : size; 530 sf->minleaf = 0; 531 sf->maxleaf = rank ? -1 : N - 1; 532 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 533 sf->nleaves = size; 534 sf->nroots = size; 535 sf->nranks = size; 536 sf->minleaf = 0; 537 sf->maxleaf = size - 1; 538 } 539 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 540 sf->graphset = PETSC_TRUE; 541 PetscFunctionReturn(0); 542 } 543 544 /*@ 545 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 546 547 Collective 548 549 Input Arguments: 550 551 . sf - star forest to invert 552 553 Output Arguments: 554 . isf - inverse of sf 555 Level: advanced 556 557 Notes: 558 All roots must have degree 1. 559 560 The local space may be a permutation, but cannot be sparse. 561 562 .seealso: PetscSFSetGraph() 563 @*/ 564 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 565 { 566 PetscErrorCode ierr; 567 PetscMPIInt rank; 568 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 569 const PetscInt *ilocal; 570 PetscSFNode *roots,*leaves; 571 572 PetscFunctionBegin; 573 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 574 PetscSFCheckGraphSet(sf,1); 575 PetscValidPointer(isf,2); 576 577 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 578 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 579 580 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 581 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 582 for (i=0; i<maxlocal; i++) { 583 leaves[i].rank = rank; 584 leaves[i].index = i; 585 } 586 for (i=0; i <nroots; i++) { 587 roots[i].rank = -1; 588 roots[i].index = -1; 589 } 590 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 591 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 592 593 /* Check whether our leaves are sparse */ 594 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 595 if (count == nroots) newilocal = NULL; 596 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 597 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 598 for (i=0,count=0; i<nroots; i++) { 599 if (roots[i].rank >= 0) { 600 newilocal[count] = i; 601 roots[count].rank = roots[i].rank; 602 roots[count].index = roots[i].index; 603 count++; 604 } 605 } 606 } 607 608 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 609 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 610 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 611 PetscFunctionReturn(0); 612 } 613 614 /*@ 615 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 616 617 Collective 618 619 Input Arguments: 620 + sf - communication object to duplicate 621 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 622 623 Output Arguments: 624 . newsf - new communication object 625 626 Level: beginner 627 628 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 629 @*/ 630 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 631 { 632 PetscSFType type; 633 PetscErrorCode ierr; 634 635 PetscFunctionBegin; 636 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 637 PetscValidLogicalCollectiveEnum(sf,opt,2); 638 PetscValidPointer(newsf,3); 639 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 640 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 641 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 642 if (opt == PETSCSF_DUPLICATE_GRAPH) { 643 PetscSFCheckGraphSet(sf,1); 644 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 645 PetscInt nroots,nleaves; 646 const PetscInt *ilocal; 647 const PetscSFNode *iremote; 648 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 649 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 650 } else { 651 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 652 } 653 } 654 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 655 PetscFunctionReturn(0); 656 } 657 658 /*@C 659 PetscSFGetGraph - Get the graph specifying a parallel star forest 660 661 Not Collective 662 663 Input Arguments: 664 . sf - star forest 665 666 Output Arguments: 667 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 668 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 669 . ilocal - locations of leaves in leafdata buffers 670 - iremote - remote locations of root vertices for each leaf on the current process 671 672 Notes: 673 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 674 675 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 676 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 677 678 Level: intermediate 679 680 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 681 @*/ 682 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 683 { 684 PetscErrorCode ierr; 685 686 PetscFunctionBegin; 687 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 688 if (sf->ops->GetGraph) { 689 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 690 } else { 691 if (nroots) *nroots = sf->nroots; 692 if (nleaves) *nleaves = sf->nleaves; 693 if (ilocal) *ilocal = sf->mine; 694 if (iremote) *iremote = sf->remote; 695 } 696 PetscFunctionReturn(0); 697 } 698 699 /*@ 700 PetscSFGetLeafRange - Get the active leaf ranges 701 702 Not Collective 703 704 Input Arguments: 705 . sf - star forest 706 707 Output Arguments: 708 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 709 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 710 711 Level: developer 712 713 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 714 @*/ 715 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 716 { 717 718 PetscFunctionBegin; 719 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 720 PetscSFCheckGraphSet(sf,1); 721 if (minleaf) *minleaf = sf->minleaf; 722 if (maxleaf) *maxleaf = sf->maxleaf; 723 PetscFunctionReturn(0); 724 } 725 726 /*@C 727 PetscSFViewFromOptions - View from Options 728 729 Collective on PetscSF 730 731 Input Parameters: 732 + A - the star forest 733 . obj - Optional object 734 - name - command line option 735 736 Level: intermediate 737 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 738 @*/ 739 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 740 { 741 PetscErrorCode ierr; 742 743 PetscFunctionBegin; 744 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 745 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 746 PetscFunctionReturn(0); 747 } 748 749 /*@C 750 PetscSFView - view a star forest 751 752 Collective 753 754 Input Arguments: 755 + sf - star forest 756 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 757 758 Level: beginner 759 760 .seealso: PetscSFCreate(), PetscSFSetGraph() 761 @*/ 762 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 763 { 764 PetscErrorCode ierr; 765 PetscBool iascii; 766 PetscViewerFormat format; 767 768 PetscFunctionBegin; 769 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 770 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 771 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 772 PetscCheckSameComm(sf,1,viewer,2); 773 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 774 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 775 if (iascii) { 776 PetscMPIInt rank; 777 PetscInt ii,i,j; 778 779 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 780 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 781 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 782 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 783 if (!sf->graphset) { 784 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 785 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 789 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 790 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 791 for (i=0; i<sf->nleaves; i++) { 792 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 793 } 794 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 795 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 796 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 797 PetscMPIInt *tmpranks,*perm; 798 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 799 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 800 for (i=0; i<sf->nranks; i++) perm[i] = i; 801 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 802 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 803 for (ii=0; ii<sf->nranks; ii++) { 804 i = perm[ii]; 805 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 806 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 807 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 808 } 809 } 810 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 811 } 812 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 813 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 814 } 815 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 /*@C 821 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 822 823 Not Collective 824 825 Input Arguments: 826 . sf - star forest 827 828 Output Arguments: 829 + nranks - number of ranks referenced by local part 830 . ranks - array of ranks 831 . roffset - offset in rmine/rremote for each rank (length nranks+1) 832 . rmine - concatenated array holding local indices referencing each remote rank 833 - rremote - concatenated array holding remote indices referenced for each remote rank 834 835 Level: developer 836 837 .seealso: PetscSFGetLeafRanks() 838 @*/ 839 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 840 { 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 845 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 846 if (sf->ops->GetRootRanks) { 847 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 848 } else { 849 /* The generic implementation */ 850 if (nranks) *nranks = sf->nranks; 851 if (ranks) *ranks = sf->ranks; 852 if (roffset) *roffset = sf->roffset; 853 if (rmine) *rmine = sf->rmine; 854 if (rremote) *rremote = sf->rremote; 855 } 856 PetscFunctionReturn(0); 857 } 858 859 /*@C 860 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 861 862 Not Collective 863 864 Input Arguments: 865 . sf - star forest 866 867 Output Arguments: 868 + niranks - number of leaf ranks referencing roots on this process 869 . iranks - array of ranks 870 . ioffset - offset in irootloc for each rank (length niranks+1) 871 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 872 873 Level: developer 874 875 .seealso: PetscSFGetRootRanks() 876 @*/ 877 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 878 { 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 883 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 884 if (sf->ops->GetLeafRanks) { 885 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 886 } else { 887 PetscSFType type; 888 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 889 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 890 } 891 PetscFunctionReturn(0); 892 } 893 894 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 895 PetscInt i; 896 for (i=0; i<n; i++) { 897 if (needle == list[i]) return PETSC_TRUE; 898 } 899 return PETSC_FALSE; 900 } 901 902 /*@C 903 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 904 905 Collective 906 907 Input Arguments: 908 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 909 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 910 911 Level: developer 912 913 .seealso: PetscSFGetRootRanks() 914 @*/ 915 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 916 { 917 PetscErrorCode ierr; 918 PetscTable table; 919 PetscTablePosition pos; 920 PetscMPIInt size,groupsize,*groupranks; 921 PetscInt *rcount,*ranks; 922 PetscInt i, irank = -1,orank = -1; 923 924 PetscFunctionBegin; 925 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 926 PetscSFCheckGraphSet(sf,1); 927 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 928 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 929 for (i=0; i<sf->nleaves; i++) { 930 /* Log 1-based rank */ 931 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 932 } 933 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 934 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 935 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 936 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 937 for (i=0; i<sf->nranks; i++) { 938 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 939 ranks[i]--; /* Convert back to 0-based */ 940 } 941 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 942 943 /* We expect that dgroup is reliably "small" while nranks could be large */ 944 { 945 MPI_Group group = MPI_GROUP_NULL; 946 PetscMPIInt *dgroupranks; 947 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 948 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 949 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 950 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 951 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 952 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 953 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 954 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 955 } 956 957 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 958 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) { 959 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 960 if (InList(ranks[i],groupsize,groupranks)) break; 961 } 962 for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 963 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 964 } 965 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 966 PetscInt tmprank,tmpcount; 967 968 tmprank = ranks[i]; 969 tmpcount = rcount[i]; 970 ranks[i] = ranks[sf->ndranks]; 971 rcount[i] = rcount[sf->ndranks]; 972 ranks[sf->ndranks] = tmprank; 973 rcount[sf->ndranks] = tmpcount; 974 sf->ndranks++; 975 } 976 } 977 ierr = PetscFree(groupranks);CHKERRQ(ierr); 978 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 979 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 980 sf->roffset[0] = 0; 981 for (i=0; i<sf->nranks; i++) { 982 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 983 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 984 rcount[i] = 0; 985 } 986 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 987 /* short circuit */ 988 if (orank != sf->remote[i].rank) { 989 /* Search for index of iremote[i].rank in sf->ranks */ 990 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 991 if (irank < 0) { 992 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 993 if (irank >= 0) irank += sf->ndranks; 994 } 995 orank = sf->remote[i].rank; 996 } 997 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 998 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 999 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 1000 rcount[irank]++; 1001 } 1002 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 /*@C 1007 PetscSFGetGroups - gets incoming and outgoing process groups 1008 1009 Collective 1010 1011 Input Argument: 1012 . sf - star forest 1013 1014 Output Arguments: 1015 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1016 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1017 1018 Level: developer 1019 1020 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1021 @*/ 1022 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1023 { 1024 PetscErrorCode ierr; 1025 MPI_Group group = MPI_GROUP_NULL; 1026 1027 PetscFunctionBegin; 1028 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1029 if (sf->ingroup == MPI_GROUP_NULL) { 1030 PetscInt i; 1031 const PetscInt *indegree; 1032 PetscMPIInt rank,*outranks,*inranks; 1033 PetscSFNode *remote; 1034 PetscSF bgcount; 1035 1036 /* Compute the number of incoming ranks */ 1037 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1038 for (i=0; i<sf->nranks; i++) { 1039 remote[i].rank = sf->ranks[i]; 1040 remote[i].index = 0; 1041 } 1042 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1043 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1044 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1045 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1046 /* Enumerate the incoming ranks */ 1047 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1048 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1049 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1050 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1051 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1052 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1053 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1054 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1055 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1056 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1057 } 1058 *incoming = sf->ingroup; 1059 1060 if (sf->outgroup == MPI_GROUP_NULL) { 1061 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1062 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1063 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1064 } 1065 *outgoing = sf->outgroup; 1066 PetscFunctionReturn(0); 1067 } 1068 1069 /*@ 1070 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1071 1072 Collective 1073 1074 Input Argument: 1075 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1076 1077 Output Arguments: 1078 . multi - star forest with split roots, such that each root has degree exactly 1 1079 1080 Level: developer 1081 1082 Notes: 1083 1084 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1085 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1086 edge, it is a candidate for future optimization that might involve its removal. 1087 1088 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1089 @*/ 1090 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1091 { 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1096 PetscValidPointer(multi,2); 1097 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1098 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1099 *multi = sf->multi; 1100 PetscFunctionReturn(0); 1101 } 1102 if (!sf->multi) { 1103 const PetscInt *indegree; 1104 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1105 PetscSFNode *remote; 1106 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1107 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1108 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1109 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1110 inoffset[0] = 0; 1111 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1112 for (i=0; i<maxlocal; i++) outones[i] = 1; 1113 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1114 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1115 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1116 #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */ 1117 for (i=0; i<sf->nroots; i++) { 1118 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1119 } 1120 #endif 1121 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1122 for (i=0; i<sf->nleaves; i++) { 1123 remote[i].rank = sf->remote[i].rank; 1124 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1125 } 1126 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1127 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1128 if (sf->rankorder) { /* Sort the ranks */ 1129 PetscMPIInt rank; 1130 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1131 PetscSFNode *newremote; 1132 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1133 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1134 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1135 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1136 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1137 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1138 /* Sort the incoming ranks at each vertex, build the inverse map */ 1139 for (i=0; i<sf->nroots; i++) { 1140 PetscInt j; 1141 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1142 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1143 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1144 } 1145 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1146 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1147 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1148 for (i=0; i<sf->nleaves; i++) { 1149 newremote[i].rank = sf->remote[i].rank; 1150 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1151 } 1152 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1153 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1154 } 1155 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1156 } 1157 *multi = sf->multi; 1158 PetscFunctionReturn(0); 1159 } 1160 1161 /*@C 1162 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1163 1164 Collective 1165 1166 Input Arguments: 1167 + sf - original star forest 1168 . nselected - number of selected roots on this process 1169 - selected - indices of the selected roots on this process 1170 1171 Output Arguments: 1172 . esf - new star forest 1173 1174 Level: advanced 1175 1176 Note: 1177 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1178 be done by calling PetscSFGetGraph(). 1179 1180 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1181 @*/ 1182 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1183 { 1184 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1185 const PetscInt *ilocal; 1186 signed char *rootdata,*leafdata,*leafmem; 1187 const PetscSFNode *iremote; 1188 PetscSFNode *new_iremote; 1189 MPI_Comm comm; 1190 PetscErrorCode ierr; 1191 1192 PetscFunctionBegin; 1193 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1194 PetscSFCheckGraphSet(sf,1); 1195 if (nselected) PetscValidPointer(selected,3); 1196 PetscValidPointer(esf,4); 1197 1198 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1199 1200 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1201 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1202 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1203 1204 #if defined(PETSC_USE_DEBUG) 1205 /* Error out if selected[] has dups or out of range indices */ 1206 { 1207 PetscBool dups; 1208 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1209 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1210 for (i=0; i<nselected; i++) 1211 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1212 } 1213 #endif 1214 1215 if (sf->ops->CreateEmbeddedSF) { 1216 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1217 } else { 1218 /* A generic version of creating embedded sf */ 1219 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1220 maxlocal = maxleaf - minleaf + 1; 1221 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1222 leafdata = leafmem - minleaf; 1223 /* Tag selected roots and bcast to leaves */ 1224 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1225 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1226 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1227 1228 /* Build esf with leaves that are still connected */ 1229 esf_nleaves = 0; 1230 for (i=0; i<nleaves; i++) { 1231 j = ilocal ? ilocal[i] : i; 1232 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1233 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1234 */ 1235 esf_nleaves += (leafdata[j] ? 1 : 0); 1236 } 1237 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1238 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1239 for (i=n=0; i<nleaves; i++) { 1240 j = ilocal ? ilocal[i] : i; 1241 if (leafdata[j]) { 1242 new_ilocal[n] = j; 1243 new_iremote[n].rank = iremote[i].rank; 1244 new_iremote[n].index = iremote[i].index; 1245 ++n; 1246 } 1247 } 1248 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1249 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1250 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1251 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1252 } 1253 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 /*@C 1258 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1259 1260 Collective 1261 1262 Input Arguments: 1263 + sf - original star forest 1264 . nselected - number of selected leaves on this process 1265 - selected - indices of the selected leaves on this process 1266 1267 Output Arguments: 1268 . newsf - new star forest 1269 1270 Level: advanced 1271 1272 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1273 @*/ 1274 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1275 { 1276 const PetscSFNode *iremote; 1277 PetscSFNode *new_iremote; 1278 const PetscInt *ilocal; 1279 PetscInt i,nroots,*leaves,*new_ilocal; 1280 MPI_Comm comm; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1285 PetscSFCheckGraphSet(sf,1); 1286 if (nselected) PetscValidPointer(selected,3); 1287 PetscValidPointer(newsf,4); 1288 1289 /* Uniq selected[] and put results in leaves[] */ 1290 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1291 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1292 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1293 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1294 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1295 1296 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1297 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1298 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1299 } else { 1300 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1301 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1302 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1303 for (i=0; i<nselected; ++i) { 1304 const PetscInt l = leaves[i]; 1305 new_ilocal[i] = ilocal ? ilocal[l] : l; 1306 new_iremote[i].rank = iremote[l].rank; 1307 new_iremote[i].index = iremote[l].index; 1308 } 1309 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1310 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1311 } 1312 ierr = PetscFree(leaves);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 /*@C 1317 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1318 1319 Collective on PetscSF 1320 1321 Input Arguments: 1322 + sf - star forest on which to communicate 1323 . unit - data type associated with each node 1324 . rootdata - buffer to broadcast 1325 - op - operation to use for reduction 1326 1327 Output Arguments: 1328 . leafdata - buffer to be reduced with values from each leaf's respective root 1329 1330 Level: intermediate 1331 1332 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1333 @*/ 1334 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1335 { 1336 PetscErrorCode ierr; 1337 PetscMemType rootmtype,leafmtype; 1338 1339 PetscFunctionBegin; 1340 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1341 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1342 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1343 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1344 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1345 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1346 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 /*@C 1351 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1352 1353 Collective 1354 1355 Input Arguments: 1356 + sf - star forest 1357 . unit - data type 1358 . rootdata - buffer to broadcast 1359 - op - operation to use for reduction 1360 1361 Output Arguments: 1362 . leafdata - buffer to be reduced with values from each leaf's respective root 1363 1364 Level: intermediate 1365 1366 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1367 @*/ 1368 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1369 { 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1374 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1375 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1376 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1377 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 /*@C 1382 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1383 1384 Collective 1385 1386 Input Arguments: 1387 + sf - star forest 1388 . unit - data type 1389 . leafdata - values to reduce 1390 - op - reduction operation 1391 1392 Output Arguments: 1393 . rootdata - result of reduction of values from all leaves of each root 1394 1395 Level: intermediate 1396 1397 .seealso: PetscSFBcastBegin() 1398 @*/ 1399 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1400 { 1401 PetscErrorCode ierr; 1402 PetscMemType rootmtype,leafmtype; 1403 1404 PetscFunctionBegin; 1405 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1406 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1407 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1408 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1409 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1410 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1411 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 /*@C 1416 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1417 1418 Collective 1419 1420 Input Arguments: 1421 + sf - star forest 1422 . unit - data type 1423 . leafdata - values to reduce 1424 - op - reduction operation 1425 1426 Output Arguments: 1427 . rootdata - result of reduction of values from all leaves of each root 1428 1429 Level: intermediate 1430 1431 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1432 @*/ 1433 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1434 { 1435 PetscErrorCode ierr; 1436 1437 PetscFunctionBegin; 1438 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1439 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1440 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1441 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1442 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 /*@C 1447 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1448 1449 Collective 1450 1451 Input Arguments: 1452 + sf - star forest 1453 . unit - data type 1454 . leafdata - leaf values to use in reduction 1455 - op - operation to use for reduction 1456 1457 Output Arguments: 1458 + rootdata - root values to be updated, input state is seen by first process to perform an update 1459 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1460 1461 Level: advanced 1462 1463 Note: 1464 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1465 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1466 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1467 integers. 1468 1469 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1470 @*/ 1471 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1472 { 1473 PetscErrorCode ierr; 1474 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1475 1476 PetscFunctionBegin; 1477 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1478 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1479 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1480 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1481 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1482 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1483 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1484 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1485 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 /*@C 1490 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1491 1492 Collective 1493 1494 Input Arguments: 1495 + sf - star forest 1496 . unit - data type 1497 . leafdata - leaf values to use in reduction 1498 - op - operation to use for reduction 1499 1500 Output Arguments: 1501 + rootdata - root values to be updated, input state is seen by first process to perform an update 1502 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1503 1504 Level: advanced 1505 1506 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1507 @*/ 1508 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1509 { 1510 PetscErrorCode ierr; 1511 1512 PetscFunctionBegin; 1513 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1514 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1515 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1516 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1517 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 /*@C 1522 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1523 1524 Collective 1525 1526 Input Arguments: 1527 . sf - star forest 1528 1529 Output Arguments: 1530 . degree - degree of each root vertex 1531 1532 Level: advanced 1533 1534 Notes: 1535 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1536 1537 .seealso: PetscSFGatherBegin() 1538 @*/ 1539 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1540 { 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1545 PetscSFCheckGraphSet(sf,1); 1546 PetscValidPointer(degree,2); 1547 if (!sf->degreeknown) { 1548 PetscInt i, nroots = sf->nroots, maxlocal; 1549 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1550 maxlocal = sf->maxleaf-sf->minleaf+1; 1551 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1552 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1553 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1554 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1555 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1556 } 1557 *degree = NULL; 1558 PetscFunctionReturn(0); 1559 } 1560 1561 /*@C 1562 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1563 1564 Collective 1565 1566 Input Arguments: 1567 . sf - star forest 1568 1569 Output Arguments: 1570 . degree - degree of each root vertex 1571 1572 Level: developer 1573 1574 Notes: 1575 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1576 1577 .seealso: 1578 @*/ 1579 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1580 { 1581 PetscErrorCode ierr; 1582 1583 PetscFunctionBegin; 1584 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1585 PetscSFCheckGraphSet(sf,1); 1586 PetscValidPointer(degree,2); 1587 if (!sf->degreeknown) { 1588 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1589 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1590 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1591 sf->degreeknown = PETSC_TRUE; 1592 } 1593 *degree = sf->degree; 1594 PetscFunctionReturn(0); 1595 } 1596 1597 1598 /*@C 1599 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1600 Each multi-root is assigned index of the corresponding original root. 1601 1602 Collective 1603 1604 Input Arguments: 1605 + sf - star forest 1606 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1607 1608 Output Arguments: 1609 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1610 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1611 1612 Level: developer 1613 1614 Notes: 1615 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1616 1617 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1618 @*/ 1619 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1620 { 1621 PetscSF msf; 1622 PetscInt i, j, k, nroots, nmroots; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1627 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1628 if (nroots) PetscValidIntPointer(degree,2); 1629 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1630 PetscValidPointer(multiRootsOrigNumbering,4); 1631 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1632 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1633 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1634 for (i=0,j=0,k=0; i<nroots; i++) { 1635 if (!degree[i]) continue; 1636 for (j=0; j<degree[i]; j++,k++) { 1637 (*multiRootsOrigNumbering)[k] = i; 1638 } 1639 } 1640 #if defined(PETSC_USE_DEBUG) 1641 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1642 #endif 1643 if (nMultiRoots) *nMultiRoots = nmroots; 1644 PetscFunctionReturn(0); 1645 } 1646 1647 /*@C 1648 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1649 1650 Collective 1651 1652 Input Arguments: 1653 + sf - star forest 1654 . unit - data type 1655 - leafdata - leaf data to gather to roots 1656 1657 Output Argument: 1658 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1659 1660 Level: intermediate 1661 1662 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1663 @*/ 1664 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1665 { 1666 PetscErrorCode ierr; 1667 PetscSF multi = NULL; 1668 1669 PetscFunctionBegin; 1670 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1671 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1672 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1673 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1674 PetscFunctionReturn(0); 1675 } 1676 1677 /*@C 1678 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1679 1680 Collective 1681 1682 Input Arguments: 1683 + sf - star forest 1684 . unit - data type 1685 - leafdata - leaf data to gather to roots 1686 1687 Output Argument: 1688 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1689 1690 Level: intermediate 1691 1692 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1693 @*/ 1694 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1695 { 1696 PetscErrorCode ierr; 1697 PetscSF multi = NULL; 1698 1699 PetscFunctionBegin; 1700 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1701 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1702 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1703 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 /*@C 1708 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1709 1710 Collective 1711 1712 Input Arguments: 1713 + sf - star forest 1714 . unit - data type 1715 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1716 1717 Output Argument: 1718 . leafdata - leaf data to be update with personal data from each respective root 1719 1720 Level: intermediate 1721 1722 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1723 @*/ 1724 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1725 { 1726 PetscErrorCode ierr; 1727 PetscSF multi = NULL; 1728 1729 PetscFunctionBegin; 1730 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1731 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1732 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1733 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1734 PetscFunctionReturn(0); 1735 } 1736 1737 /*@C 1738 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1739 1740 Collective 1741 1742 Input Arguments: 1743 + sf - star forest 1744 . unit - data type 1745 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1746 1747 Output Argument: 1748 . leafdata - leaf data to be update with personal data from each respective root 1749 1750 Level: intermediate 1751 1752 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1753 @*/ 1754 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1755 { 1756 PetscErrorCode ierr; 1757 PetscSF multi = NULL; 1758 1759 PetscFunctionBegin; 1760 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1761 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1762 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1763 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1764 PetscFunctionReturn(0); 1765 } 1766 1767 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1768 { 1769 #if defined(PETSC_USE_DEBUG) 1770 PetscInt i, n, nleaves; 1771 const PetscInt *ilocal = NULL; 1772 PetscHSetI seen; 1773 PetscErrorCode ierr; 1774 1775 PetscFunctionBegin; 1776 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1777 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1778 for (i = 0; i < nleaves; i++) { 1779 const PetscInt leaf = ilocal ? ilocal[i] : i; 1780 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1781 } 1782 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1783 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1784 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1785 PetscFunctionReturn(0); 1786 #else 1787 PetscFunctionBegin; 1788 PetscFunctionReturn(0); 1789 #endif 1790 } 1791 1792 /*@ 1793 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1794 1795 Input Parameters: 1796 + sfA - The first PetscSF 1797 - sfB - The second PetscSF 1798 1799 Output Parameters: 1800 . sfBA - The composite SF 1801 1802 Level: developer 1803 1804 Notes: 1805 Currently, the two SFs must be defined on congruent communicators and they must be true star 1806 forests, i.e. the same leaf is not connected with different roots. 1807 1808 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1809 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1810 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1811 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1812 1813 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1814 @*/ 1815 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1816 { 1817 PetscErrorCode ierr; 1818 const PetscSFNode *remotePointsA,*remotePointsB; 1819 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1820 const PetscInt *localPointsA,*localPointsB; 1821 PetscInt *localPointsBA; 1822 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1823 PetscBool denseB; 1824 1825 PetscFunctionBegin; 1826 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1827 PetscSFCheckGraphSet(sfA,1); 1828 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1829 PetscSFCheckGraphSet(sfB,2); 1830 PetscCheckSameComm(sfA,1,sfB,2); 1831 PetscValidPointer(sfBA,3); 1832 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1833 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1834 1835 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1836 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1837 if (localPointsA) { 1838 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1839 for (i=0; i<numRootsB; i++) { 1840 reorderedRemotePointsA[i].rank = -1; 1841 reorderedRemotePointsA[i].index = -1; 1842 } 1843 for (i=0; i<numLeavesA; i++) { 1844 if (localPointsA[i] >= numRootsB) continue; 1845 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1846 } 1847 remotePointsA = reorderedRemotePointsA; 1848 } 1849 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1850 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1851 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1852 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1853 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1854 1855 denseB = (PetscBool)!localPointsB; 1856 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1857 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1858 else numLeavesBA++; 1859 } 1860 if (denseB) { 1861 localPointsBA = NULL; 1862 remotePointsBA = leafdataB; 1863 } else { 1864 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1865 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1866 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1867 const PetscInt l = localPointsB ? localPointsB[i] : i; 1868 1869 if (leafdataB[l-minleaf].rank == -1) continue; 1870 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1871 localPointsBA[numLeavesBA] = l; 1872 numLeavesBA++; 1873 } 1874 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1875 } 1876 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1877 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 /*@ 1882 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1883 1884 Input Parameters: 1885 + sfA - The first PetscSF 1886 - sfB - The second PetscSF 1887 1888 Output Parameters: 1889 . sfBA - The composite SF. 1890 1891 Level: developer 1892 1893 Notes: 1894 Currently, the two SFs must be defined on congruent communicators and they must be true star 1895 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1896 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1897 1898 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1899 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1900 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1901 a Reduce on sfB, on connected roots. 1902 1903 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1904 @*/ 1905 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1906 { 1907 PetscErrorCode ierr; 1908 const PetscSFNode *remotePointsA,*remotePointsB; 1909 PetscSFNode *remotePointsBA; 1910 const PetscInt *localPointsA,*localPointsB; 1911 PetscSFNode *reorderedRemotePointsA = NULL; 1912 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1913 MPI_Op op; 1914 #if defined(PETSC_USE_64BIT_INDICES) 1915 PetscBool iswin; 1916 #endif 1917 1918 PetscFunctionBegin; 1919 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1920 PetscSFCheckGraphSet(sfA,1); 1921 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1922 PetscSFCheckGraphSet(sfB,2); 1923 PetscCheckSameComm(sfA,1,sfB,2); 1924 PetscValidPointer(sfBA,3); 1925 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1926 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1927 1928 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1929 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1930 1931 /* TODO: Check roots of sfB have degree of 1 */ 1932 /* Once we implement it, we can replace the MPI_MAXLOC 1933 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1934 We use MPI_MAXLOC only to have a deterministic output from this routine if 1935 the root condition is not meet. 1936 */ 1937 op = MPI_MAXLOC; 1938 #if defined(PETSC_USE_64BIT_INDICES) 1939 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 1940 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 1941 if (iswin) op = MPIU_REPLACE; 1942 #endif 1943 1944 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1945 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1946 for (i=0; i<maxleaf - minleaf + 1; i++) { 1947 reorderedRemotePointsA[i].rank = -1; 1948 reorderedRemotePointsA[i].index = -1; 1949 } 1950 if (localPointsA) { 1951 for (i=0; i<numLeavesA; i++) { 1952 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1953 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1954 } 1955 } else { 1956 for (i=0; i<numLeavesA; i++) { 1957 if (i > maxleaf || i < minleaf) continue; 1958 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1959 } 1960 } 1961 1962 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1963 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1964 for (i=0; i<numRootsB; i++) { 1965 remotePointsBA[i].rank = -1; 1966 remotePointsBA[i].index = -1; 1967 } 1968 1969 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1970 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1971 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1972 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1973 if (remotePointsBA[i].rank == -1) continue; 1974 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1975 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1976 localPointsBA[numLeavesBA] = i; 1977 numLeavesBA++; 1978 } 1979 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1980 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 /* 1985 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1986 1987 Input Parameters: 1988 . sf - The global PetscSF 1989 1990 Output Parameters: 1991 . out - The local PetscSF 1992 */ 1993 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1994 { 1995 MPI_Comm comm; 1996 PetscMPIInt myrank; 1997 const PetscInt *ilocal; 1998 const PetscSFNode *iremote; 1999 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 2000 PetscSFNode *liremote; 2001 PetscSF lsf; 2002 PetscErrorCode ierr; 2003 2004 PetscFunctionBegin; 2005 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2006 if (sf->ops->CreateLocalSF) { 2007 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 2008 } else { 2009 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 2010 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2011 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 2012 2013 /* Find out local edges and build a local SF */ 2014 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2015 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2016 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2017 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2018 2019 for (i=j=0; i<nleaves; i++) { 2020 if (iremote[i].rank == (PetscInt)myrank) { 2021 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2022 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2023 liremote[j].index = iremote[i].index; 2024 j++; 2025 } 2026 } 2027 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2029 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2030 *out = lsf; 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 2035 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2036 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2037 { 2038 PetscErrorCode ierr; 2039 PetscMemType rootmtype,leafmtype; 2040 2041 PetscFunctionBegin; 2042 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2043 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2044 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2045 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2046 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2047 if (sf->ops->BcastToZero) { 2048 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2049 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2050 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054