1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/hashseti.h> 3 #include <petscctable.h> 4 5 #if defined(PETSC_USE_DEBUG) 6 # define PetscSFCheckGraphSet(sf,arg) do { \ 7 if (PetscUnlikely(!(sf)->graphset)) \ 8 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 9 } while (0) 10 #else 11 # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 12 #endif 13 14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 15 16 /*@ 17 PetscSFCreate - create a star forest communication context 18 19 Collective 20 21 Input Arguments: 22 . comm - communicator on which the star forest will operate 23 24 Output Arguments: 25 . sf - new star forest context 26 27 Options Database Keys: 28 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 29 . -sf_type window -Use MPI-3 one-sided window for communication 30 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 31 32 Level: intermediate 33 34 Notes: 35 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 36 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 37 SFs are optimized and they have better performance than general SFs. 38 39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 40 @*/ 41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 42 { 43 PetscErrorCode ierr; 44 PetscSF b; 45 46 PetscFunctionBegin; 47 PetscValidPointer(sf,2); 48 ierr = PetscSFInitializePackage();CHKERRQ(ierr); 49 50 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 51 52 b->nroots = -1; 53 b->nleaves = -1; 54 b->minleaf = PETSC_MAX_INT; 55 b->maxleaf = PETSC_MIN_INT; 56 b->nranks = -1; 57 b->rankorder = PETSC_TRUE; 58 b->ingroup = MPI_GROUP_NULL; 59 b->outgroup = MPI_GROUP_NULL; 60 b->graphset = PETSC_FALSE; 61 62 *sf = b; 63 PetscFunctionReturn(0); 64 } 65 66 /*@ 67 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 68 69 Collective 70 71 Input Arguments: 72 . sf - star forest 73 74 Level: advanced 75 76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 77 @*/ 78 PetscErrorCode PetscSFReset(PetscSF sf) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 84 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 85 sf->nroots = -1; 86 sf->nleaves = -1; 87 sf->minleaf = PETSC_MAX_INT; 88 sf->maxleaf = PETSC_MIN_INT; 89 sf->mine = NULL; 90 sf->remote = NULL; 91 sf->graphset = PETSC_FALSE; 92 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 93 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 94 sf->nranks = -1; 95 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 96 #if defined(PETSC_HAVE_CUDA) 97 { 98 PetscInt i; 99 for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}} 100 } 101 #endif 102 sf->degreeknown = PETSC_FALSE; 103 ierr = PetscFree(sf->degree);CHKERRQ(ierr); 104 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 105 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 106 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 107 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 108 sf->setupcalled = PETSC_FALSE; 109 PetscFunctionReturn(0); 110 } 111 112 /*@C 113 PetscSFSetType - Set the PetscSF communication implementation 114 115 Collective on PetscSF 116 117 Input Parameters: 118 + sf - the PetscSF context 119 - type - a known method 120 121 Options Database Key: 122 . -sf_type <type> - Sets the method; use -help for a list 123 of available methods (for instance, window, basic, neighbor) 124 125 Notes: 126 See "include/petscsf.h" for available methods (for instance) 127 + PETSCSFWINDOW - MPI-2/3 one-sided 128 - PETSCSFBASIC - basic implementation using MPI-1 two-sided 129 130 Level: intermediate 131 132 .seealso: PetscSFType, PetscSFCreate() 133 @*/ 134 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 135 { 136 PetscErrorCode ierr,(*r)(PetscSF); 137 PetscBool match; 138 139 PetscFunctionBegin; 140 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 141 PetscValidCharPointer(type,2); 142 143 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 144 if (match) PetscFunctionReturn(0); 145 146 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 147 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 148 /* Destroy the previous PetscSF implementation context */ 149 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 150 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 151 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 152 ierr = (*r)(sf);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 /*@C 157 PetscSFGetType - Get the PetscSF communication implementation 158 159 Not Collective 160 161 Input Parameter: 162 . sf - the PetscSF context 163 164 Output Parameter: 165 . type - the PetscSF type name 166 167 Level: intermediate 168 169 .seealso: PetscSFSetType(), PetscSFCreate() 170 @*/ 171 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 172 { 173 PetscFunctionBegin; 174 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 175 PetscValidPointer(type,2); 176 *type = ((PetscObject)sf)->type_name; 177 PetscFunctionReturn(0); 178 } 179 180 /*@ 181 PetscSFDestroy - destroy star forest 182 183 Collective 184 185 Input Arguments: 186 . sf - address of star forest 187 188 Level: intermediate 189 190 .seealso: PetscSFCreate(), PetscSFReset() 191 @*/ 192 PetscErrorCode PetscSFDestroy(PetscSF *sf) 193 { 194 PetscErrorCode ierr; 195 196 PetscFunctionBegin; 197 if (!*sf) PetscFunctionReturn(0); 198 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 199 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 200 ierr = PetscSFReset(*sf);CHKERRQ(ierr); 201 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 202 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 207 { 208 PetscInt i, nleaves; 209 PetscMPIInt size; 210 const PetscInt *ilocal; 211 const PetscSFNode *iremote; 212 PetscErrorCode ierr; 213 214 PetscFunctionBegin; 215 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 216 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 217 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 218 for (i = 0; i < nleaves; i++) { 219 const PetscInt rank = iremote[i].rank; 220 const PetscInt remote = iremote[i].index; 221 const PetscInt leaf = ilocal ? ilocal[i] : i; 222 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 223 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 224 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 225 } 226 PetscFunctionReturn(0); 227 } 228 229 /*@ 230 PetscSFSetUp - set up communication structures 231 232 Collective 233 234 Input Arguments: 235 . sf - star forest communication object 236 237 Level: beginner 238 239 .seealso: PetscSFSetFromOptions(), PetscSFSetType() 240 @*/ 241 PetscErrorCode PetscSFSetUp(PetscSF sf) 242 { 243 PetscErrorCode ierr; 244 245 PetscFunctionBegin; 246 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 247 PetscSFCheckGraphSet(sf,1); 248 if (sf->setupcalled) PetscFunctionReturn(0); 249 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 250 sf->use_gpu_aware_mpi = use_gpu_aware_mpi; 251 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} 252 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 253 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 254 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 255 sf->setupcalled = PETSC_TRUE; 256 PetscFunctionReturn(0); 257 } 258 259 /*@ 260 PetscSFSetFromOptions - set PetscSF options using the options database 261 262 Logically Collective 263 264 Input Arguments: 265 . sf - star forest 266 267 Options Database Keys: 268 + -sf_type - implementation type, see PetscSFSetType() 269 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 270 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 271 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 272 If true, this option only works with -use_cuda_aware_mpi 1. 273 - -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 274 If true, this option only works with -use_cuda_aware_mpi 1. 275 276 Level: intermediate 277 @*/ 278 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 279 { 280 PetscSFType deft; 281 char type[256]; 282 PetscErrorCode ierr; 283 PetscBool flg; 284 285 PetscFunctionBegin; 286 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 287 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 288 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 289 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 290 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 291 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 292 293 #if defined(PETSC_HAVE_CUDA) 294 sf->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 295 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 296 sf->use_stream_aware_mpi = PETSC_FALSE; 297 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 298 #endif 299 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 300 ierr = PetscOptionsEnd();CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 /*@ 305 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 306 307 Logically Collective 308 309 Input Arguments: 310 + sf - star forest 311 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 312 313 Level: advanced 314 315 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 316 @*/ 317 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 318 { 319 PetscFunctionBegin; 320 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 321 PetscValidLogicalCollectiveBool(sf,flg,2); 322 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 323 sf->rankorder = flg; 324 PetscFunctionReturn(0); 325 } 326 327 /*@ 328 PetscSFSetGraph - Set a parallel star forest 329 330 Collective 331 332 Input Arguments: 333 + sf - star forest 334 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 335 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 336 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 337 during setup in debug mode) 338 . localmode - copy mode for ilocal 339 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 340 during setup in debug mode) 341 - remotemode - copy mode for iremote 342 343 Level: intermediate 344 345 Notes: 346 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 347 348 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 349 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 350 needed 351 352 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 353 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 354 355 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 356 @*/ 357 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 358 { 359 PetscErrorCode ierr; 360 361 PetscFunctionBegin; 362 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 363 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 364 if (nleaves > 0) PetscValidPointer(iremote,6); 365 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 366 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 367 368 if (sf->nroots >= 0) { /* Reset only if graph already set */ 369 ierr = PetscSFReset(sf);CHKERRQ(ierr); 370 } 371 372 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 373 374 sf->nroots = nroots; 375 sf->nleaves = nleaves; 376 377 if (nleaves && ilocal) { 378 PetscInt i; 379 PetscInt minleaf = PETSC_MAX_INT; 380 PetscInt maxleaf = PETSC_MIN_INT; 381 int contiguous = 1; 382 for (i=0; i<nleaves; i++) { 383 minleaf = PetscMin(minleaf,ilocal[i]); 384 maxleaf = PetscMax(maxleaf,ilocal[i]); 385 contiguous &= (ilocal[i] == i); 386 } 387 sf->minleaf = minleaf; 388 sf->maxleaf = maxleaf; 389 if (contiguous) { 390 if (localmode == PETSC_OWN_POINTER) { 391 ierr = PetscFree(ilocal);CHKERRQ(ierr); 392 } 393 ilocal = NULL; 394 } 395 } else { 396 sf->minleaf = 0; 397 sf->maxleaf = nleaves - 1; 398 } 399 400 if (ilocal) { 401 switch (localmode) { 402 case PETSC_COPY_VALUES: 403 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 404 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 405 sf->mine = sf->mine_alloc; 406 break; 407 case PETSC_OWN_POINTER: 408 sf->mine_alloc = (PetscInt*)ilocal; 409 sf->mine = sf->mine_alloc; 410 break; 411 case PETSC_USE_POINTER: 412 sf->mine_alloc = NULL; 413 sf->mine = (PetscInt*)ilocal; 414 break; 415 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 416 } 417 } 418 419 switch (remotemode) { 420 case PETSC_COPY_VALUES: 421 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 422 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 423 sf->remote = sf->remote_alloc; 424 break; 425 case PETSC_OWN_POINTER: 426 sf->remote_alloc = (PetscSFNode*)iremote; 427 sf->remote = sf->remote_alloc; 428 break; 429 case PETSC_USE_POINTER: 430 sf->remote_alloc = NULL; 431 sf->remote = (PetscSFNode*)iremote; 432 break; 433 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 434 } 435 436 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 437 sf->graphset = PETSC_TRUE; 438 PetscFunctionReturn(0); 439 } 440 441 /*@ 442 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 443 444 Collective 445 446 Input Parameters: 447 + sf - The PetscSF 448 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 449 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 450 451 Notes: 452 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 453 n and N are local and global sizes of x respectively. 454 455 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 456 sequential vectors y on all ranks. 457 458 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 459 sequential vector y on rank 0. 460 461 In above cases, entries of x are roots and entries of y are leaves. 462 463 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 464 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 465 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 466 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 467 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 468 469 In this case, roots and leaves are symmetric. 470 471 Level: intermediate 472 @*/ 473 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 474 { 475 MPI_Comm comm; 476 PetscInt n,N,res[2]; 477 PetscMPIInt rank,size; 478 PetscSFType type; 479 PetscErrorCode ierr; 480 481 PetscFunctionBegin; 482 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 483 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 484 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 485 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 486 487 if (pattern == PETSCSF_PATTERN_ALLTOALL) { 488 type = PETSCSFALLTOALL; 489 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 490 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 491 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 492 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 493 } else { 494 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 495 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 496 res[0] = n; 497 res[1] = -n; 498 /* Check if n are same over all ranks so that we can optimize it */ 499 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 500 if (res[0] == -res[1]) { /* same n */ 501 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 502 } else { 503 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 504 } 505 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 506 } 507 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 508 509 sf->pattern = pattern; 510 sf->mine = NULL; /* Contiguous */ 511 512 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 513 Also set other easy stuff. 514 */ 515 if (pattern == PETSCSF_PATTERN_ALLGATHER) { 516 sf->nleaves = N; 517 sf->nroots = n; 518 sf->nranks = size; 519 sf->minleaf = 0; 520 sf->maxleaf = N - 1; 521 } else if (pattern == PETSCSF_PATTERN_GATHER) { 522 sf->nleaves = rank ? 0 : N; 523 sf->nroots = n; 524 sf->nranks = rank ? 0 : size; 525 sf->minleaf = 0; 526 sf->maxleaf = rank ? -1 : N - 1; 527 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 528 sf->nleaves = size; 529 sf->nroots = size; 530 sf->nranks = size; 531 sf->minleaf = 0; 532 sf->maxleaf = size - 1; 533 } 534 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 535 sf->graphset = PETSC_TRUE; 536 PetscFunctionReturn(0); 537 } 538 539 /*@ 540 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 541 542 Collective 543 544 Input Arguments: 545 546 . sf - star forest to invert 547 548 Output Arguments: 549 . isf - inverse of sf 550 Level: advanced 551 552 Notes: 553 All roots must have degree 1. 554 555 The local space may be a permutation, but cannot be sparse. 556 557 .seealso: PetscSFSetGraph() 558 @*/ 559 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 560 { 561 PetscErrorCode ierr; 562 PetscMPIInt rank; 563 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 564 const PetscInt *ilocal; 565 PetscSFNode *roots,*leaves; 566 567 PetscFunctionBegin; 568 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 569 PetscSFCheckGraphSet(sf,1); 570 PetscValidPointer(isf,2); 571 572 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 573 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 574 575 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 576 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 577 for (i=0; i<maxlocal; i++) { 578 leaves[i].rank = rank; 579 leaves[i].index = i; 580 } 581 for (i=0; i <nroots; i++) { 582 roots[i].rank = -1; 583 roots[i].index = -1; 584 } 585 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 586 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 587 588 /* Check whether our leaves are sparse */ 589 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 590 if (count == nroots) newilocal = NULL; 591 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 592 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 593 for (i=0,count=0; i<nroots; i++) { 594 if (roots[i].rank >= 0) { 595 newilocal[count] = i; 596 roots[count].rank = roots[i].rank; 597 roots[count].index = roots[i].index; 598 count++; 599 } 600 } 601 } 602 603 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 604 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 605 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608 609 /*@ 610 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 611 612 Collective 613 614 Input Arguments: 615 + sf - communication object to duplicate 616 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 617 618 Output Arguments: 619 . newsf - new communication object 620 621 Level: beginner 622 623 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 624 @*/ 625 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 626 { 627 PetscSFType type; 628 PetscErrorCode ierr; 629 630 PetscFunctionBegin; 631 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 632 PetscValidLogicalCollectiveEnum(sf,opt,2); 633 PetscValidPointer(newsf,3); 634 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 635 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 636 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 637 if (opt == PETSCSF_DUPLICATE_GRAPH) { 638 PetscSFCheckGraphSet(sf,1); 639 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 640 PetscInt nroots,nleaves; 641 const PetscInt *ilocal; 642 const PetscSFNode *iremote; 643 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 644 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 645 } else { 646 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 647 } 648 } 649 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 650 PetscFunctionReturn(0); 651 } 652 653 /*@C 654 PetscSFGetGraph - Get the graph specifying a parallel star forest 655 656 Not Collective 657 658 Input Arguments: 659 . sf - star forest 660 661 Output Arguments: 662 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 663 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 664 . ilocal - locations of leaves in leafdata buffers 665 - iremote - remote locations of root vertices for each leaf on the current process 666 667 Notes: 668 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 669 670 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 671 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 672 673 Level: intermediate 674 675 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 676 @*/ 677 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 678 { 679 PetscErrorCode ierr; 680 681 PetscFunctionBegin; 682 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 683 if (sf->ops->GetGraph) { 684 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 685 } else { 686 if (nroots) *nroots = sf->nroots; 687 if (nleaves) *nleaves = sf->nleaves; 688 if (ilocal) *ilocal = sf->mine; 689 if (iremote) *iremote = sf->remote; 690 } 691 PetscFunctionReturn(0); 692 } 693 694 /*@ 695 PetscSFGetLeafRange - Get the active leaf ranges 696 697 Not Collective 698 699 Input Arguments: 700 . sf - star forest 701 702 Output Arguments: 703 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 704 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 705 706 Level: developer 707 708 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 709 @*/ 710 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 711 { 712 PetscFunctionBegin; 713 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 714 PetscSFCheckGraphSet(sf,1); 715 if (minleaf) *minleaf = sf->minleaf; 716 if (maxleaf) *maxleaf = sf->maxleaf; 717 PetscFunctionReturn(0); 718 } 719 720 /*@C 721 PetscSFViewFromOptions - View from Options 722 723 Collective on PetscSF 724 725 Input Parameters: 726 + A - the star forest 727 . obj - Optional object 728 - name - command line option 729 730 Level: intermediate 731 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 732 @*/ 733 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 734 { 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 739 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 /*@C 744 PetscSFView - view a star forest 745 746 Collective 747 748 Input Arguments: 749 + sf - star forest 750 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 751 752 Level: beginner 753 754 .seealso: PetscSFCreate(), PetscSFSetGraph() 755 @*/ 756 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 757 { 758 PetscErrorCode ierr; 759 PetscBool iascii; 760 PetscViewerFormat format; 761 762 PetscFunctionBegin; 763 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 764 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 765 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 766 PetscCheckSameComm(sf,1,viewer,2); 767 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 768 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 769 if (iascii) { 770 PetscMPIInt rank; 771 PetscInt ii,i,j; 772 773 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 774 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 775 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 776 if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 777 if (!sf->graphset) { 778 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 779 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 783 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 784 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 785 for (i=0; i<sf->nleaves; i++) { 786 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 787 } 788 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 789 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 790 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 791 PetscMPIInt *tmpranks,*perm; 792 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 793 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 794 for (i=0; i<sf->nranks; i++) perm[i] = i; 795 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 796 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 797 for (ii=0; ii<sf->nranks; ii++) { 798 i = perm[ii]; 799 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 800 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 801 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 802 } 803 } 804 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 805 } 806 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 807 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 808 } 809 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 810 } 811 PetscFunctionReturn(0); 812 } 813 814 /*@C 815 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 816 817 Not Collective 818 819 Input Arguments: 820 . sf - star forest 821 822 Output Arguments: 823 + nranks - number of ranks referenced by local part 824 . ranks - array of ranks 825 . roffset - offset in rmine/rremote for each rank (length nranks+1) 826 . rmine - concatenated array holding local indices referencing each remote rank 827 - rremote - concatenated array holding remote indices referenced for each remote rank 828 829 Level: developer 830 831 .seealso: PetscSFGetLeafRanks() 832 @*/ 833 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 834 { 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 839 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 840 if (sf->ops->GetRootRanks) { 841 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 842 } else { 843 /* The generic implementation */ 844 if (nranks) *nranks = sf->nranks; 845 if (ranks) *ranks = sf->ranks; 846 if (roffset) *roffset = sf->roffset; 847 if (rmine) *rmine = sf->rmine; 848 if (rremote) *rremote = sf->rremote; 849 } 850 PetscFunctionReturn(0); 851 } 852 853 /*@C 854 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 855 856 Not Collective 857 858 Input Arguments: 859 . sf - star forest 860 861 Output Arguments: 862 + niranks - number of leaf ranks referencing roots on this process 863 . iranks - array of ranks 864 . ioffset - offset in irootloc for each rank (length niranks+1) 865 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 866 867 Level: developer 868 869 .seealso: PetscSFGetRootRanks() 870 @*/ 871 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 872 { 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 877 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 878 if (sf->ops->GetLeafRanks) { 879 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 880 } else { 881 PetscSFType type; 882 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 883 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 884 } 885 PetscFunctionReturn(0); 886 } 887 888 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 889 PetscInt i; 890 for (i=0; i<n; i++) { 891 if (needle == list[i]) return PETSC_TRUE; 892 } 893 return PETSC_FALSE; 894 } 895 896 /*@C 897 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 898 899 Collective 900 901 Input Arguments: 902 + sf - PetscSF to set up; PetscSFSetGraph() must have been called 903 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 904 905 Level: developer 906 907 .seealso: PetscSFGetRootRanks() 908 @*/ 909 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 910 { 911 PetscErrorCode ierr; 912 PetscTable table; 913 PetscTablePosition pos; 914 PetscMPIInt size,groupsize,*groupranks; 915 PetscInt *rcount,*ranks; 916 PetscInt i, irank = -1,orank = -1; 917 918 PetscFunctionBegin; 919 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 920 PetscSFCheckGraphSet(sf,1); 921 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 922 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 923 for (i=0; i<sf->nleaves; i++) { 924 /* Log 1-based rank */ 925 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 926 } 927 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 928 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 929 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 930 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 931 for (i=0; i<sf->nranks; i++) { 932 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 933 ranks[i]--; /* Convert back to 0-based */ 934 } 935 ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 936 937 /* We expect that dgroup is reliably "small" while nranks could be large */ 938 { 939 MPI_Group group = MPI_GROUP_NULL; 940 PetscMPIInt *dgroupranks; 941 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 942 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 943 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 944 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 945 for (i=0; i<groupsize; i++) dgroupranks[i] = i; 946 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 947 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 948 ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 949 } 950 951 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 952 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 953 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 954 if (InList(ranks[i],groupsize,groupranks)) break; 955 } 956 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 957 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 958 } 959 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 960 PetscInt tmprank,tmpcount; 961 962 tmprank = ranks[i]; 963 tmpcount = rcount[i]; 964 ranks[i] = ranks[sf->ndranks]; 965 rcount[i] = rcount[sf->ndranks]; 966 ranks[sf->ndranks] = tmprank; 967 rcount[sf->ndranks] = tmpcount; 968 sf->ndranks++; 969 } 970 } 971 ierr = PetscFree(groupranks);CHKERRQ(ierr); 972 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 973 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 974 sf->roffset[0] = 0; 975 for (i=0; i<sf->nranks; i++) { 976 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 977 sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 978 rcount[i] = 0; 979 } 980 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 981 /* short circuit */ 982 if (orank != sf->remote[i].rank) { 983 /* Search for index of iremote[i].rank in sf->ranks */ 984 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 985 if (irank < 0) { 986 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 987 if (irank >= 0) irank += sf->ndranks; 988 } 989 orank = sf->remote[i].rank; 990 } 991 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 992 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 993 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 994 rcount[irank]++; 995 } 996 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 1000 /*@C 1001 PetscSFGetGroups - gets incoming and outgoing process groups 1002 1003 Collective 1004 1005 Input Argument: 1006 . sf - star forest 1007 1008 Output Arguments: 1009 + incoming - group of origin processes for incoming edges (leaves that reference my roots) 1010 - outgoing - group of destination processes for outgoing edges (roots that I reference) 1011 1012 Level: developer 1013 1014 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 1015 @*/ 1016 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 1017 { 1018 PetscErrorCode ierr; 1019 MPI_Group group = MPI_GROUP_NULL; 1020 1021 PetscFunctionBegin; 1022 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 1023 if (sf->ingroup == MPI_GROUP_NULL) { 1024 PetscInt i; 1025 const PetscInt *indegree; 1026 PetscMPIInt rank,*outranks,*inranks; 1027 PetscSFNode *remote; 1028 PetscSF bgcount; 1029 1030 /* Compute the number of incoming ranks */ 1031 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 1032 for (i=0; i<sf->nranks; i++) { 1033 remote[i].rank = sf->ranks[i]; 1034 remote[i].index = 0; 1035 } 1036 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 1037 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1038 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 1039 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 1040 /* Enumerate the incoming ranks */ 1041 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 1042 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1043 for (i=0; i<sf->nranks; i++) outranks[i] = rank; 1044 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1045 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 1046 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1047 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 1048 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1049 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 1050 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 1051 } 1052 *incoming = sf->ingroup; 1053 1054 if (sf->outgroup == MPI_GROUP_NULL) { 1055 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1056 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 1057 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1058 } 1059 *outgoing = sf->outgroup; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 /*@ 1064 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 1065 1066 Collective 1067 1068 Input Argument: 1069 . sf - star forest that may contain roots with 0 or with more than 1 vertex 1070 1071 Output Arguments: 1072 . multi - star forest with split roots, such that each root has degree exactly 1 1073 1074 Level: developer 1075 1076 Notes: 1077 1078 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 1079 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 1080 edge, it is a candidate for future optimization that might involve its removal. 1081 1082 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 1083 @*/ 1084 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 1085 { 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1090 PetscValidPointer(multi,2); 1091 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 1092 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1093 *multi = sf->multi; 1094 PetscFunctionReturn(0); 1095 } 1096 if (!sf->multi) { 1097 const PetscInt *indegree; 1098 PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 1099 PetscSFNode *remote; 1100 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 1101 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 1102 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 1103 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 1104 inoffset[0] = 0; 1105 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 1106 for (i=0; i<maxlocal; i++) outones[i] = 1; 1107 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1108 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1109 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 1110 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 1111 for (i=0; i<sf->nroots; i++) { 1112 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 1113 } 1114 } 1115 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 1116 for (i=0; i<sf->nleaves; i++) { 1117 remote[i].rank = sf->remote[i].rank; 1118 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 1119 } 1120 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 1121 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1122 if (sf->rankorder) { /* Sort the ranks */ 1123 PetscMPIInt rank; 1124 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 1125 PetscSFNode *newremote; 1126 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 1127 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 1128 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 1129 for (i=0; i<maxlocal; i++) outranks[i] = rank; 1130 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1131 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 1132 /* Sort the incoming ranks at each vertex, build the inverse map */ 1133 for (i=0; i<sf->nroots; i++) { 1134 PetscInt j; 1135 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 1136 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 1137 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 1138 } 1139 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1140 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 1142 for (i=0; i<sf->nleaves; i++) { 1143 newremote[i].rank = sf->remote[i].rank; 1144 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 1145 } 1146 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1147 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 1148 } 1149 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 1150 } 1151 *multi = sf->multi; 1152 PetscFunctionReturn(0); 1153 } 1154 1155 /*@C 1156 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 1157 1158 Collective 1159 1160 Input Arguments: 1161 + sf - original star forest 1162 . nselected - number of selected roots on this process 1163 - selected - indices of the selected roots on this process 1164 1165 Output Arguments: 1166 . esf - new star forest 1167 1168 Level: advanced 1169 1170 Note: 1171 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 1172 be done by calling PetscSFGetGraph(). 1173 1174 .seealso: PetscSFSetGraph(), PetscSFGetGraph() 1175 @*/ 1176 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 1177 { 1178 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1179 const PetscInt *ilocal; 1180 signed char *rootdata,*leafdata,*leafmem; 1181 const PetscSFNode *iremote; 1182 PetscSFNode *new_iremote; 1183 MPI_Comm comm; 1184 PetscErrorCode ierr; 1185 1186 PetscFunctionBegin; 1187 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1188 PetscSFCheckGraphSet(sf,1); 1189 if (nselected) PetscValidPointer(selected,3); 1190 PetscValidPointer(esf,4); 1191 1192 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1193 1194 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1195 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1196 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1197 1198 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1199 1200 PetscBool dups; 1201 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1202 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1203 for (i=0; i<nselected; i++) 1204 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1205 } 1206 1207 if (sf->ops->CreateEmbeddedSF) { 1208 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1209 } else { 1210 /* A generic version of creating embedded sf */ 1211 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1212 maxlocal = maxleaf - minleaf + 1; 1213 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1214 leafdata = leafmem - minleaf; 1215 /* Tag selected roots and bcast to leaves */ 1216 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1217 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1218 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1219 1220 /* Build esf with leaves that are still connected */ 1221 esf_nleaves = 0; 1222 for (i=0; i<nleaves; i++) { 1223 j = ilocal ? ilocal[i] : i; 1224 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1225 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1226 */ 1227 esf_nleaves += (leafdata[j] ? 1 : 0); 1228 } 1229 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1230 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1231 for (i=n=0; i<nleaves; i++) { 1232 j = ilocal ? ilocal[i] : i; 1233 if (leafdata[j]) { 1234 new_ilocal[n] = j; 1235 new_iremote[n].rank = iremote[i].rank; 1236 new_iremote[n].index = iremote[i].index; 1237 ++n; 1238 } 1239 } 1240 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1241 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1242 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1243 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1244 } 1245 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 /*@C 1250 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 1251 1252 Collective 1253 1254 Input Arguments: 1255 + sf - original star forest 1256 . nselected - number of selected leaves on this process 1257 - selected - indices of the selected leaves on this process 1258 1259 Output Arguments: 1260 . newsf - new star forest 1261 1262 Level: advanced 1263 1264 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 1265 @*/ 1266 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 1267 { 1268 const PetscSFNode *iremote; 1269 PetscSFNode *new_iremote; 1270 const PetscInt *ilocal; 1271 PetscInt i,nroots,*leaves,*new_ilocal; 1272 MPI_Comm comm; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1277 PetscSFCheckGraphSet(sf,1); 1278 if (nselected) PetscValidPointer(selected,3); 1279 PetscValidPointer(newsf,4); 1280 1281 /* Uniq selected[] and put results in leaves[] */ 1282 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1283 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1284 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1285 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1286 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1287 1288 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1289 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1290 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1291 } else { 1292 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1293 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1294 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1295 for (i=0; i<nselected; ++i) { 1296 const PetscInt l = leaves[i]; 1297 new_ilocal[i] = ilocal ? ilocal[l] : l; 1298 new_iremote[i].rank = iremote[l].rank; 1299 new_iremote[i].index = iremote[l].index; 1300 } 1301 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1302 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(leaves);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /*@C 1309 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 1310 1311 Collective on PetscSF 1312 1313 Input Arguments: 1314 + sf - star forest on which to communicate 1315 . unit - data type associated with each node 1316 . rootdata - buffer to broadcast 1317 - op - operation to use for reduction 1318 1319 Output Arguments: 1320 . leafdata - buffer to be reduced with values from each leaf's respective root 1321 1322 Level: intermediate 1323 1324 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd() 1325 @*/ 1326 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1327 { 1328 PetscErrorCode ierr; 1329 PetscMemType rootmtype,leafmtype; 1330 1331 PetscFunctionBegin; 1332 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1333 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1334 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1335 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1336 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1337 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1338 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 /*@C 1343 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 1344 1345 Collective 1346 1347 Input Arguments: 1348 + sf - star forest 1349 . unit - data type 1350 . rootdata - buffer to broadcast 1351 - op - operation to use for reduction 1352 1353 Output Arguments: 1354 . leafdata - buffer to be reduced with values from each leaf's respective root 1355 1356 Level: intermediate 1357 1358 .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 1359 @*/ 1360 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 1361 { 1362 PetscErrorCode ierr; 1363 1364 PetscFunctionBegin; 1365 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1366 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1367 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1368 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 1369 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 /*@C 1374 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 1375 1376 Collective 1377 1378 Input Arguments: 1379 + sf - star forest 1380 . unit - data type 1381 . leafdata - values to reduce 1382 - op - reduction operation 1383 1384 Output Arguments: 1385 . rootdata - result of reduction of values from all leaves of each root 1386 1387 Level: intermediate 1388 1389 .seealso: PetscSFBcastBegin() 1390 @*/ 1391 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1392 { 1393 PetscErrorCode ierr; 1394 PetscMemType rootmtype,leafmtype; 1395 1396 PetscFunctionBegin; 1397 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1398 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1399 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1400 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1401 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1402 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1403 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 /*@C 1408 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 1409 1410 Collective 1411 1412 Input Arguments: 1413 + sf - star forest 1414 . unit - data type 1415 . leafdata - values to reduce 1416 - op - reduction operation 1417 1418 Output Arguments: 1419 . rootdata - result of reduction of values from all leaves of each root 1420 1421 Level: intermediate 1422 1423 .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 1424 @*/ 1425 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 1426 { 1427 PetscErrorCode ierr; 1428 1429 PetscFunctionBegin; 1430 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1431 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1432 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1433 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1434 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 /*@C 1439 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1440 1441 Collective 1442 1443 Input Arguments: 1444 + sf - star forest 1445 . unit - data type 1446 . leafdata - leaf values to use in reduction 1447 - op - operation to use for reduction 1448 1449 Output Arguments: 1450 + rootdata - root values to be updated, input state is seen by first process to perform an update 1451 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1452 1453 Level: advanced 1454 1455 Note: 1456 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1457 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1458 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1459 integers. 1460 1461 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1462 @*/ 1463 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1464 { 1465 PetscErrorCode ierr; 1466 PetscMemType rootmtype,leafmtype,leafupdatemtype; 1467 1468 PetscFunctionBegin; 1469 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1470 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1471 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1472 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1473 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1474 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1475 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1476 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1477 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 /*@C 1482 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1483 1484 Collective 1485 1486 Input Arguments: 1487 + sf - star forest 1488 . unit - data type 1489 . leafdata - leaf values to use in reduction 1490 - op - operation to use for reduction 1491 1492 Output Arguments: 1493 + rootdata - root values to be updated, input state is seen by first process to perform an update 1494 - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1495 1496 Level: advanced 1497 1498 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1499 @*/ 1500 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1501 { 1502 PetscErrorCode ierr; 1503 1504 PetscFunctionBegin; 1505 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1506 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1507 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1508 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1509 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 /*@C 1514 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 1515 1516 Collective 1517 1518 Input Arguments: 1519 . sf - star forest 1520 1521 Output Arguments: 1522 . degree - degree of each root vertex 1523 1524 Level: advanced 1525 1526 Notes: 1527 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1528 1529 .seealso: PetscSFGatherBegin() 1530 @*/ 1531 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 1532 { 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1537 PetscSFCheckGraphSet(sf,1); 1538 PetscValidPointer(degree,2); 1539 if (!sf->degreeknown) { 1540 PetscInt i, nroots = sf->nroots, maxlocal; 1541 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 1542 maxlocal = sf->maxleaf-sf->minleaf+1; 1543 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 1544 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 1545 for (i=0; i<nroots; i++) sf->degree[i] = 0; 1546 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 1547 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1548 } 1549 *degree = NULL; 1550 PetscFunctionReturn(0); 1551 } 1552 1553 /*@C 1554 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 1555 1556 Collective 1557 1558 Input Arguments: 1559 . sf - star forest 1560 1561 Output Arguments: 1562 . degree - degree of each root vertex 1563 1564 Level: developer 1565 1566 Notes: 1567 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1568 1569 .seealso: 1570 @*/ 1571 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 1572 { 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1577 PetscSFCheckGraphSet(sf,1); 1578 PetscValidPointer(degree,2); 1579 if (!sf->degreeknown) { 1580 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 1581 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 1582 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 1583 sf->degreeknown = PETSC_TRUE; 1584 } 1585 *degree = sf->degree; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 1590 /*@C 1591 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 1592 Each multi-root is assigned index of the corresponding original root. 1593 1594 Collective 1595 1596 Input Arguments: 1597 + sf - star forest 1598 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1599 1600 Output Arguments: 1601 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 1602 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1603 1604 Level: developer 1605 1606 Notes: 1607 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1608 1609 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1610 @*/ 1611 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1612 { 1613 PetscSF msf; 1614 PetscInt i, j, k, nroots, nmroots; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1619 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 1620 if (nroots) PetscValidIntPointer(degree,2); 1621 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 1622 PetscValidPointer(multiRootsOrigNumbering,4); 1623 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1624 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 1625 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1626 for (i=0,j=0,k=0; i<nroots; i++) { 1627 if (!degree[i]) continue; 1628 for (j=0; j<degree[i]; j++,k++) { 1629 (*multiRootsOrigNumbering)[k] = i; 1630 } 1631 } 1632 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 1633 if (nMultiRoots) *nMultiRoots = nmroots; 1634 PetscFunctionReturn(0); 1635 } 1636 1637 /*@C 1638 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 1639 1640 Collective 1641 1642 Input Arguments: 1643 + sf - star forest 1644 . unit - data type 1645 - leafdata - leaf data to gather to roots 1646 1647 Output Argument: 1648 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1649 1650 Level: intermediate 1651 1652 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1653 @*/ 1654 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1655 { 1656 PetscErrorCode ierr; 1657 PetscSF multi = NULL; 1658 1659 PetscFunctionBegin; 1660 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1661 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1662 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1663 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1664 PetscFunctionReturn(0); 1665 } 1666 1667 /*@C 1668 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 1669 1670 Collective 1671 1672 Input Arguments: 1673 + sf - star forest 1674 . unit - data type 1675 - leafdata - leaf data to gather to roots 1676 1677 Output Argument: 1678 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 1679 1680 Level: intermediate 1681 1682 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1683 @*/ 1684 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 1685 { 1686 PetscErrorCode ierr; 1687 PetscSF multi = NULL; 1688 1689 PetscFunctionBegin; 1690 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1691 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1692 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1693 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 1694 PetscFunctionReturn(0); 1695 } 1696 1697 /*@C 1698 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 1699 1700 Collective 1701 1702 Input Arguments: 1703 + sf - star forest 1704 . unit - data type 1705 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1706 1707 Output Argument: 1708 . leafdata - leaf data to be update with personal data from each respective root 1709 1710 Level: intermediate 1711 1712 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 1713 @*/ 1714 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1715 { 1716 PetscErrorCode ierr; 1717 PetscSF multi = NULL; 1718 1719 PetscFunctionBegin; 1720 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1721 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1722 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1723 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 1727 /*@C 1728 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 1729 1730 Collective 1731 1732 Input Arguments: 1733 + sf - star forest 1734 . unit - data type 1735 - multirootdata - root buffer to send to each leaf, one unit of data per leaf 1736 1737 Output Argument: 1738 . leafdata - leaf data to be update with personal data from each respective root 1739 1740 Level: intermediate 1741 1742 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 1743 @*/ 1744 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 1745 { 1746 PetscErrorCode ierr; 1747 PetscSF multi = NULL; 1748 1749 PetscFunctionBegin; 1750 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1751 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1752 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 1753 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756 1757 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1758 { 1759 PetscInt i, n, nleaves; 1760 const PetscInt *ilocal = NULL; 1761 PetscHSetI seen; 1762 PetscErrorCode ierr; 1763 1764 PetscFunctionBegin; 1765 if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 1766 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1767 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1768 for (i = 0; i < nleaves; i++) { 1769 const PetscInt leaf = ilocal ? ilocal[i] : i; 1770 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1771 } 1772 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1773 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1774 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 /*@ 1779 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1780 1781 Input Parameters: 1782 + sfA - The first PetscSF 1783 - sfB - The second PetscSF 1784 1785 Output Parameters: 1786 . sfBA - The composite SF 1787 1788 Level: developer 1789 1790 Notes: 1791 Currently, the two SFs must be defined on congruent communicators and they must be true star 1792 forests, i.e. the same leaf is not connected with different roots. 1793 1794 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 1795 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 1796 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 1797 Bcast on sfA, then a Bcast on sfB, on connected nodes. 1798 1799 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1800 @*/ 1801 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1802 { 1803 PetscErrorCode ierr; 1804 const PetscSFNode *remotePointsA,*remotePointsB; 1805 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 1806 const PetscInt *localPointsA,*localPointsB; 1807 PetscInt *localPointsBA; 1808 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 1809 PetscBool denseB; 1810 1811 PetscFunctionBegin; 1812 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1813 PetscSFCheckGraphSet(sfA,1); 1814 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1815 PetscSFCheckGraphSet(sfB,2); 1816 PetscCheckSameComm(sfA,1,sfB,2); 1817 PetscValidPointer(sfBA,3); 1818 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1819 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1820 1821 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1822 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1823 if (localPointsA) { 1824 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 1825 for (i=0; i<numRootsB; i++) { 1826 reorderedRemotePointsA[i].rank = -1; 1827 reorderedRemotePointsA[i].index = -1; 1828 } 1829 for (i=0; i<numLeavesA; i++) { 1830 if (localPointsA[i] >= numRootsB) continue; 1831 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 1832 } 1833 remotePointsA = reorderedRemotePointsA; 1834 } 1835 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1836 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1837 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1838 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1839 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1840 1841 denseB = (PetscBool)!localPointsB; 1842 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1843 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 1844 else numLeavesBA++; 1845 } 1846 if (denseB) { 1847 localPointsBA = NULL; 1848 remotePointsBA = leafdataB; 1849 } else { 1850 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 1851 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 1852 for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 1853 const PetscInt l = localPointsB ? localPointsB[i] : i; 1854 1855 if (leafdataB[l-minleaf].rank == -1) continue; 1856 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 1857 localPointsBA[numLeavesBA] = l; 1858 numLeavesBA++; 1859 } 1860 ierr = PetscFree(leafdataB);CHKERRQ(ierr); 1861 } 1862 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1863 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1864 PetscFunctionReturn(0); 1865 } 1866 1867 /*@ 1868 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 1869 1870 Input Parameters: 1871 + sfA - The first PetscSF 1872 - sfB - The second PetscSF 1873 1874 Output Parameters: 1875 . sfBA - The composite SF. 1876 1877 Level: developer 1878 1879 Notes: 1880 Currently, the two SFs must be defined on congruent communicators and they must be true star 1881 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 1882 second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 1883 1884 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 1885 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 1886 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 1887 a Reduce on sfB, on connected roots. 1888 1889 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 1890 @*/ 1891 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1892 { 1893 PetscErrorCode ierr; 1894 const PetscSFNode *remotePointsA,*remotePointsB; 1895 PetscSFNode *remotePointsBA; 1896 const PetscInt *localPointsA,*localPointsB; 1897 PetscSFNode *reorderedRemotePointsA = NULL; 1898 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 1899 MPI_Op op; 1900 #if defined(PETSC_USE_64BIT_INDICES) 1901 PetscBool iswin; 1902 #endif 1903 1904 PetscFunctionBegin; 1905 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 1906 PetscSFCheckGraphSet(sfA,1); 1907 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 1908 PetscSFCheckGraphSet(sfB,2); 1909 PetscCheckSameComm(sfA,1,sfB,2); 1910 PetscValidPointer(sfBA,3); 1911 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 1912 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 1913 1914 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 1915 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 1916 1917 /* TODO: Check roots of sfB have degree of 1 */ 1918 /* Once we implement it, we can replace the MPI_MAXLOC 1919 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 1920 We use MPI_MAXLOC only to have a deterministic output from this routine if 1921 the root condition is not meet. 1922 */ 1923 op = MPI_MAXLOC; 1924 #if defined(PETSC_USE_64BIT_INDICES) 1925 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 1926 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 1927 if (iswin) op = MPIU_REPLACE; 1928 #endif 1929 1930 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 1931 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 1932 for (i=0; i<maxleaf - minleaf + 1; i++) { 1933 reorderedRemotePointsA[i].rank = -1; 1934 reorderedRemotePointsA[i].index = -1; 1935 } 1936 if (localPointsA) { 1937 for (i=0; i<numLeavesA; i++) { 1938 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 1939 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 1940 } 1941 } else { 1942 for (i=0; i<numLeavesA; i++) { 1943 if (i > maxleaf || i < minleaf) continue; 1944 reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 1945 } 1946 } 1947 1948 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 1949 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 1950 for (i=0; i<numRootsB; i++) { 1951 remotePointsBA[i].rank = -1; 1952 remotePointsBA[i].index = -1; 1953 } 1954 1955 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1956 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 1957 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1958 for (i=0,numLeavesBA=0; i<numRootsB; i++) { 1959 if (remotePointsBA[i].rank == -1) continue; 1960 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 1961 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 1962 localPointsBA[numLeavesBA] = i; 1963 numLeavesBA++; 1964 } 1965 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 1966 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 1967 PetscFunctionReturn(0); 1968 } 1969 1970 /* 1971 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 1972 1973 Input Parameters: 1974 . sf - The global PetscSF 1975 1976 Output Parameters: 1977 . out - The local PetscSF 1978 */ 1979 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 1980 { 1981 MPI_Comm comm; 1982 PetscMPIInt myrank; 1983 const PetscInt *ilocal; 1984 const PetscSFNode *iremote; 1985 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 1986 PetscSFNode *liremote; 1987 PetscSF lsf; 1988 PetscErrorCode ierr; 1989 1990 PetscFunctionBegin; 1991 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1992 if (sf->ops->CreateLocalSF) { 1993 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 1994 } else { 1995 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 1996 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1997 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 1998 1999 /* Find out local edges and build a local SF */ 2000 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 2001 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 2002 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 2003 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 2004 2005 for (i=j=0; i<nleaves; i++) { 2006 if (iremote[i].rank == (PetscInt)myrank) { 2007 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 2008 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 2009 liremote[j].index = iremote[i].index; 2010 j++; 2011 } 2012 } 2013 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 2014 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 2015 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 2016 *out = lsf; 2017 } 2018 PetscFunctionReturn(0); 2019 } 2020 2021 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2022 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2023 { 2024 PetscErrorCode ierr; 2025 PetscMemType rootmtype,leafmtype; 2026 2027 PetscFunctionBegin; 2028 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2029 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2030 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2031 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2032 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2033 if (sf->ops->BcastToZero) { 2034 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2035 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2036 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2037 PetscFunctionReturn(0); 2038 } 2039 2040