1 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 2 #include <../src/vec/is/sf/impls/basic/sfpack.h> 3 #include <petsc/private/viewerimpl.h> 4 5 /*===================================================================================*/ 6 /* SF public interface implementations */ 7 /*===================================================================================*/ 8 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 9 { 10 PetscErrorCode ierr; 11 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 12 PetscInt *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks; 13 PetscMPIInt rank,niranks,*iranks,tag; 14 MPI_Comm comm; 15 MPI_Group group; 16 MPI_Request *rootreqs,*leafreqs; 17 18 PetscFunctionBegin; 19 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr); 20 ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 21 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 22 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 23 ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 24 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 25 /* 26 * Inform roots about how many leaves and from which ranks 27 */ 28 ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 29 /* Determine number, sending ranks and length of incoming */ 30 for (i=0; i<sf->nranks; i++) { 31 rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 32 } 33 nRemoteRootRanks = sf->nranks-sf->ndranks; 34 ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 35 36 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 37 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 38 small and the sorting is cheap. 39 */ 40 ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 41 42 /* Partition into distinguished and non-distinguished incoming ranks */ 43 bas->ndiranks = sf->ndranks; 44 bas->niranks = bas->ndiranks + niranks; 45 ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 46 bas->ioffset[0] = 0; 47 for (i=0; i<bas->ndiranks; i++) { 48 bas->iranks[i] = sf->ranks[i]; 49 bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 50 } 51 if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 52 for (; i<bas->niranks; i++) { 53 bas->iranks[i] = iranks[i-bas->ndiranks]; 54 bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 55 } 56 bas->itotal = bas->ioffset[i]; 57 ierr = PetscFree(rlengths);CHKERRQ(ierr); 58 ierr = PetscFree(iranks);CHKERRQ(ierr); 59 ierr = PetscFree(ilengths);CHKERRQ(ierr); 60 61 /* Send leaf identities to roots */ 62 nRemoteLeafRanks = bas->niranks-bas->ndiranks; 63 ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 64 ierr = PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs);CHKERRQ(ierr); 65 for (i=bas->ndiranks; i<bas->niranks; i++) { 66 ierr = MPIU_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr); 67 } 68 for (i=0; i<sf->nranks; i++) { 69 PetscInt npoints = sf->roffset[i+1] - sf->roffset[i]; 70 if (i < sf->ndranks) { 71 if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 72 if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 73 if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 74 ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 75 continue; 76 } 77 ierr = MPIU_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr); 78 } 79 ierr = MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 80 ierr = MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 81 82 sf->nleafreqs = nRemoteRootRanks; 83 bas->nrootreqs = nRemoteLeafRanks; 84 sf->persistent = PETSC_TRUE; 85 86 /* Setup fields related to packing, such as rootbuflen[] */ 87 ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr); 88 ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 93 { 94 PetscErrorCode ierr; 95 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 96 PetscSFLink link = bas->avail,next; 97 98 PetscFunctionBegin; 99 if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 100 ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 101 ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 102 103 #if defined(PETSC_HAVE_DEVICE) 104 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);} 105 #endif 106 107 #if defined(PETSC_HAVE_NVSHMEM) 108 ierr = PetscSFReset_Basic_NVSHMEM(sf);CHKERRQ(ierr); 109 #endif 110 111 for (; link; link=next) {next = link->next; ierr = PetscSFLinkDestroy(sf,link);CHKERRQ(ierr);} 112 bas->avail = NULL; 113 ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr); 114 PetscFunctionReturn(0); 115 } 116 117 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 118 { 119 PetscErrorCode ierr; 120 121 PetscFunctionBegin; 122 ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr); 123 ierr = PetscFree(sf->data);CHKERRQ(ierr); 124 PetscFunctionReturn(0); 125 } 126 127 #if defined(PETSC_USE_SINGLE_LIBRARY) 128 #include <petscmat.h> 129 130 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer) 131 { 132 PetscErrorCode ierr; 133 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 134 PetscInt i,nrootranks,ndrootranks; 135 const PetscInt *rootoffset; 136 PetscMPIInt rank,size; 137 const PetscMPIInt *rootranks; 138 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 139 PetscScalar unitbytes; 140 Mat A; 141 142 PetscFunctionBegin; 143 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 144 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 145 /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do 146 PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix 147 in senders' view point: how many bytes I will send to my neighbors. 148 149 Looking at a column of the matrix, one can also know how many bytes the rank will receive from others. 150 151 If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give 152 different interpretations for the same SF for different data types. Since we most care about VecScatter, 153 we uniformly treat each vertex as a PetscScalar. 154 */ 155 unitbytes = (PetscScalar)sizeof(PetscScalar); 156 157 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL);CHKERRQ(ierr); 158 ierr = MatCreateAIJ(comm,1,1,size,size,1,NULL,nrootranks-ndrootranks,NULL,&A);CHKERRQ(ierr); 159 ierr = MatSetOptionsPrefix(A,"__petsc_internal__");CHKERRQ(ierr); /* To prevent the internal A from taking any command line options */ 160 for (i=0; i<nrootranks; i++) { 161 ierr = MatSetValue(A,(PetscInt)rank,bas->iranks[i],(rootoffset[i+1]-rootoffset[i])*unitbytes,INSERT_VALUES);CHKERRQ(ierr); 162 } 163 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 165 ierr = MatView(A,viewer);CHKERRQ(ierr); 166 ierr = MatDestroy(&A);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 #endif 170 171 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 172 { 173 PetscErrorCode ierr; 174 PetscBool isascii; 175 176 PetscFunctionBegin; 177 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr); 178 if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {ierr = PetscViewerASCIIPrintf(viewer," MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);} 179 #if defined(PETSC_USE_SINGLE_LIBRARY) 180 else { 181 PetscBool isdraw,isbinary; 182 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 183 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 184 if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) { 185 ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr); 186 } 187 } 188 #endif 189 PetscFunctionReturn(0); 190 } 191 192 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 193 { 194 PetscErrorCode ierr; 195 PetscSFLink link = NULL; 196 197 PetscFunctionBegin; 198 /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */ 199 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr); 200 /* Pack rootdata to rootbuf for remote communication */ 201 ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr); 202 /* Start communcation, e.g., post MPI_Isend */ 203 ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 204 /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */ 205 ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr); 206 PetscFunctionReturn(0); 207 } 208 209 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 210 { 211 PetscErrorCode ierr; 212 PetscSFLink link = NULL; 213 214 PetscFunctionBegin; 215 /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 216 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 217 /* Finish remote communication, e.g., post MPI_Waitall */ 218 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 219 /* Unpack data in leafbuf to leafdata for remote communication */ 220 ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr); 221 /* Recycle the link */ 222 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 226 /* Shared by ReduceBegin and FetchAndOpBegin */ 227 PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 228 { 229 PetscErrorCode ierr; 230 PetscSFLink link = NULL; 231 232 PetscFunctionBegin; 233 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr); 234 ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr); 235 ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 236 *out = link; 237 PetscFunctionReturn(0); 238 } 239 240 /* leaf -> root with reduction */ 241 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 242 { 243 PetscErrorCode ierr; 244 PetscSFLink link = NULL; 245 246 PetscFunctionBegin; 247 ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr); 248 ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 253 { 254 PetscErrorCode ierr; 255 PetscSFLink link = NULL; 256 257 PetscFunctionBegin; 258 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 259 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 260 ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr); 261 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 262 PetscFunctionReturn(0); 263 } 264 265 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 266 { 267 PetscErrorCode ierr; 268 PetscSFLink link = NULL; 269 270 PetscFunctionBegin; 271 ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr); 272 ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 273 PetscFunctionReturn(0); 274 } 275 276 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 277 { 278 PetscErrorCode ierr; 279 PetscSFLink link = NULL; 280 281 PetscFunctionBegin; 282 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 283 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 284 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 285 /* Do fetch-and-op, the (remote) update results are in rootbuf */ 286 ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr); 287 /* Bcast rootbuf to leafupdate */ 288 ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 289 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 290 /* Unpack and insert fetched data into leaves */ 291 ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr); 292 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 293 PetscFunctionReturn(0); 294 } 295 296 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 297 { 298 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 299 300 PetscFunctionBegin; 301 if (niranks) *niranks = bas->niranks; 302 if (iranks) *iranks = bas->iranks; 303 if (ioffset) *ioffset = bas->ioffset; 304 if (irootloc) *irootloc = bas->irootloc; 305 PetscFunctionReturn(0); 306 } 307 308 /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf. 309 We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 310 was sorted before calling the routine. 311 */ 312 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 313 { 314 PetscSF esf; 315 PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote; 316 PetscInt i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 317 char *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */ 318 PetscMPIInt *esf_ranks; 319 const PetscMPIInt *ranks,*iranks; 320 const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc; 321 PetscBool connected; 322 PetscSFNode *new_iremote; 323 PetscSF_Basic *bas; 324 PetscErrorCode ierr; 325 326 PetscFunctionBegin; 327 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 328 ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr); 329 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 330 331 /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 332 ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 333 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 334 maxlocal = maxleaf - minleaf + 1; 335 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 336 leafdata = leafmem - minleaf; 337 /* Tag selected roots */ 338 for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 339 340 ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 341 ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 342 ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 343 esf_nranks = esf_ndranks = esf_nleaves = 0; 344 for (i=0; i<nranks; i++) { 345 connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 346 for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}} 347 if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;} 348 } 349 350 /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 351 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 352 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 353 ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr); 354 p = 0; /* Counter for connected root ranks */ 355 q = 0; /* Counter for connected leaves */ 356 esf_roffset[0] = 0; 357 for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 358 connected = PETSC_FALSE; 359 for (j=roffset[i]; j<roffset[i+1]; j++) { 360 if (leafdata[rmine[j]]) { 361 esf_rmine[q] = new_ilocal[q] = rmine[j]; 362 esf_rremote[q] = rremote[j]; 363 new_iremote[q].index = rremote[j]; 364 new_iremote[q].rank = ranks[i]; 365 connected = PETSC_TRUE; 366 q++; 367 } 368 } 369 if (connected) { 370 esf_ranks[p] = ranks[i]; 371 esf_roffset[p+1] = q; 372 p++; 373 } 374 } 375 376 /* SetGraph internally resets the SF, so we only set its fields after the call */ 377 ierr = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 378 esf->nranks = esf_nranks; 379 esf->ndranks = esf_ndranks; 380 esf->ranks = esf_ranks; 381 esf->roffset = esf_roffset; 382 esf->rmine = esf_rmine; 383 esf->rremote = esf_rremote; 384 esf->nleafreqs = esf_nranks - esf_ndranks; 385 386 /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 387 bas = (PetscSF_Basic*)esf->data; 388 ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 389 /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 390 we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 391 */ 392 ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 393 ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 394 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 395 p = 0; /* Counter for connected leaf ranks */ 396 q = 0; /* Counter for connected roots */ 397 for (i=0; i<niranks; i++) { 398 connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 399 for (j=ioffset[i]; j<ioffset[i+1]; j++) { 400 if (rootdata[irootloc[j]]) { 401 bas->irootloc[q++] = irootloc[j]; 402 connected = PETSC_TRUE; 403 } 404 } 405 if (connected) { 406 bas->niranks++; 407 if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 408 bas->iranks[p] = iranks[i]; 409 bas->ioffset[p+1] = q; 410 p++; 411 } 412 } 413 bas->itotal = q; 414 bas->nrootreqs = bas->niranks - bas->ndiranks; 415 esf->persistent = PETSC_TRUE; 416 /* Setup packing related fields */ 417 ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr); 418 419 /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 420 #if defined(PETSC_HAVE_CUDA) 421 if (esf->backend == PETSCSF_BACKEND_CUDA) { 422 esf->ops->Malloc = PetscSFMalloc_CUDA; 423 esf->ops->Free = PetscSFFree_CUDA; 424 } 425 #endif 426 427 #if defined(PETSC_HAVE_HIP) 428 /* TODO: Needs debugging */ 429 if (esf->backend == PETSCSF_BACKEND_HIP) { 430 esf->ops->Malloc = PetscSFMalloc_HIP; 431 esf->ops->Free = PetscSFFree_HIP; 432 } 433 #endif 434 435 #if defined(PETSC_HAVE_KOKKOS) 436 if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 437 esf->ops->Malloc = PetscSFMalloc_Kokkos; 438 esf->ops->Free = PetscSFFree_Kokkos; 439 } 440 #endif 441 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 442 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 443 *newsf = esf; 444 PetscFunctionReturn(0); 445 } 446 447 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 448 { 449 PetscSF_Basic *dat; 450 PetscErrorCode ierr; 451 452 PetscFunctionBegin; 453 sf->ops->SetUp = PetscSFSetUp_Basic; 454 sf->ops->Reset = PetscSFReset_Basic; 455 sf->ops->Destroy = PetscSFDestroy_Basic; 456 sf->ops->View = PetscSFView_Basic; 457 sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 458 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 459 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 460 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 461 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 462 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 463 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 464 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 465 466 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 467 sf->data = (void*)dat; 468 PetscFunctionReturn(0); 469 } 470