1 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 2 #include <../src/vec/is/sf/impls/basic/sfpack.h> 3 4 /*===================================================================================*/ 5 /* SF public interface implementations */ 6 /*===================================================================================*/ 7 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 8 { 9 PetscErrorCode ierr; 10 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 11 PetscInt *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks; 12 PetscMPIInt rank,niranks,*iranks,tag; 13 MPI_Comm comm; 14 MPI_Group group; 15 MPI_Request *rootreqs,*leafreqs; 16 17 PetscFunctionBegin; 18 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr); 19 ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 20 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 21 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 22 ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 23 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 24 /* 25 * Inform roots about how many leaves and from which ranks 26 */ 27 ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 28 /* Determine number, sending ranks and length of incoming */ 29 for (i=0; i<sf->nranks; i++) { 30 rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 31 } 32 nRemoteRootRanks = sf->nranks-sf->ndranks; 33 ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 34 35 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 36 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 37 small and the sorting is cheap. 38 */ 39 ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 40 41 /* Partition into distinguished and non-distinguished incoming ranks */ 42 bas->ndiranks = sf->ndranks; 43 bas->niranks = bas->ndiranks + niranks; 44 ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 45 bas->ioffset[0] = 0; 46 for (i=0; i<bas->ndiranks; i++) { 47 bas->iranks[i] = sf->ranks[i]; 48 bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 49 } 50 if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 51 for (; i<bas->niranks; i++) { 52 bas->iranks[i] = iranks[i-bas->ndiranks]; 53 bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 54 } 55 bas->itotal = bas->ioffset[i]; 56 ierr = PetscFree(rlengths);CHKERRQ(ierr); 57 ierr = PetscFree(iranks);CHKERRQ(ierr); 58 ierr = PetscFree(ilengths);CHKERRQ(ierr); 59 60 /* Send leaf identities to roots */ 61 nRemoteLeafRanks = bas->niranks-bas->ndiranks; 62 ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 63 ierr = PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs);CHKERRQ(ierr); 64 for (i=bas->ndiranks; i<bas->niranks; i++) { 65 ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr); 66 } 67 for (i=0; i<sf->nranks; i++) { 68 PetscMPIInt npoints; 69 ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 70 if (i < sf->ndranks) { 71 if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 72 if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 73 if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 74 ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 75 continue; 76 } 77 ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr); 78 } 79 ierr = MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 80 ierr = MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 81 82 sf->nleafreqs = nRemoteRootRanks; 83 bas->nrootreqs = nRemoteLeafRanks; 84 sf->persistent = PETSC_TRUE; 85 86 /* Setup fields related to packing, such as rootbuflen[] */ 87 ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr); 88 ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 93 { 94 PetscErrorCode ierr; 95 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 96 PetscSFLink link = bas->avail,next; 97 98 PetscFunctionBegin; 99 if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 100 ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 101 ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 102 103 #if defined(PETSC_HAVE_DEVICE) 104 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);} 105 #endif 106 107 #if defined(PETSC_HAVE_NVSHMEM) 108 ierr = PetscSFReset_Basic_NVSHMEM(sf);CHKERRQ(ierr); 109 #endif 110 111 for (; link; link=next) {next = link->next; ierr = PetscSFLinkDestroy(sf,link);CHKERRQ(ierr);} 112 bas->avail = NULL; 113 ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr); 114 PetscFunctionReturn(0); 115 } 116 117 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 118 { 119 PetscErrorCode ierr; 120 121 PetscFunctionBegin; 122 ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr); 123 ierr = PetscFree(sf->data);CHKERRQ(ierr); 124 PetscFunctionReturn(0); 125 } 126 127 #if defined(PETSC_USE_SINGLE_LIBRARY) 128 #include <petscmat.h> 129 130 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer) 131 { 132 PetscErrorCode ierr; 133 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 134 PetscSFLink link = bas->avail; 135 PetscInt i,nrootranks,ndrootranks,myrank; 136 const PetscInt *rootoffset; 137 PetscMPIInt rank,size; 138 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 139 Mat A; 140 141 PetscFunctionBegin; 142 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 143 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 144 myrank = rank; 145 if (sf->persistent) { 146 /* amount of data I send to other ranks - global to local */ 147 ierr = MatCreateAIJ(comm,1,1,size,size,20,NULL,20,NULL,&A);CHKERRQ(ierr); 148 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 149 for (i=0; i<nrootranks; i++) { 150 ierr = MatSetValue(A,myrank,bas->iranks[i],(rootoffset[i+1] - rootoffset[i])*link->unitbytes,INSERT_VALUES);CHKERRQ(ierr); 151 } 152 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 153 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 154 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 155 ierr = MatView(A,viewer);CHKERRQ(ierr); 156 ierr = MatDestroy(&A);CHKERRQ(ierr); 157 } 158 PetscFunctionReturn(0); 159 } 160 #endif 161 162 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 163 { 164 PetscErrorCode ierr; 165 PetscBool iascii; 166 167 PetscFunctionBegin; 168 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 169 if (iascii) {ierr = PetscViewerASCIIPrintf(viewer," MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);} 170 #if defined(PETSC_USE_SINGLE_LIBRARY) 171 { 172 PetscBool ibinary; 173 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&ibinary);CHKERRQ(ierr); 174 if (ibinary) {ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr);} 175 } 176 #endif 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 181 { 182 PetscErrorCode ierr; 183 PetscSFLink link = NULL; 184 185 PetscFunctionBegin; 186 /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */ 187 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr); 188 /* Pack rootdata to rootbuf for remote communication */ 189 ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr); 190 /* Start communcation, e.g., post MPI_Isend */ 191 ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 192 /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */ 193 ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 198 { 199 PetscErrorCode ierr; 200 PetscSFLink link = NULL; 201 202 PetscFunctionBegin; 203 /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 204 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 205 /* Finish remote communication, e.g., post MPI_Waitall */ 206 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 207 /* Unpack data in leafbuf to leafdata for remote communication */ 208 ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr); 209 /* Recycle the link */ 210 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 /* Shared by ReduceBegin and FetchAndOpBegin */ 215 PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 216 { 217 PetscErrorCode ierr; 218 PetscSFLink link = NULL; 219 220 PetscFunctionBegin; 221 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr); 222 ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr); 223 ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 224 *out = link; 225 PetscFunctionReturn(0); 226 } 227 228 /* leaf -> root with reduction */ 229 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 230 { 231 PetscErrorCode ierr; 232 PetscSFLink link = NULL; 233 234 PetscFunctionBegin; 235 ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr); 236 ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 240 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 241 { 242 PetscErrorCode ierr; 243 PetscSFLink link = NULL; 244 245 PetscFunctionBegin; 246 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 247 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 248 ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr); 249 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 250 PetscFunctionReturn(0); 251 } 252 253 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 254 { 255 PetscErrorCode ierr; 256 PetscSFLink link = NULL; 257 258 PetscFunctionBegin; 259 ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr); 260 ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 265 { 266 PetscErrorCode ierr; 267 PetscSFLink link = NULL; 268 269 PetscFunctionBegin; 270 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 271 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 272 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 273 /* Do fetch-and-op, the (remote) update results are in rootbuf */ 274 ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr); 275 /* Bcast rootbuf to leafupdate */ 276 ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 277 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 278 /* Unpack and insert fetched data into leaves */ 279 ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr); 280 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 285 { 286 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 287 288 PetscFunctionBegin; 289 if (niranks) *niranks = bas->niranks; 290 if (iranks) *iranks = bas->iranks; 291 if (ioffset) *ioffset = bas->ioffset; 292 if (irootloc) *irootloc = bas->irootloc; 293 PetscFunctionReturn(0); 294 } 295 296 /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf. 297 We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 298 was sorted before calling the routine. 299 */ 300 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 301 { 302 PetscSF esf; 303 PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote; 304 PetscInt i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 305 char *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */ 306 PetscMPIInt *esf_ranks; 307 const PetscMPIInt *ranks,*iranks; 308 const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc; 309 PetscBool connected; 310 PetscSFNode *new_iremote; 311 PetscSF_Basic *bas; 312 PetscErrorCode ierr; 313 314 PetscFunctionBegin; 315 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 316 ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr); 317 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 318 319 /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 320 ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 321 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 322 maxlocal = maxleaf - minleaf + 1; 323 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 324 leafdata = leafmem - minleaf; 325 /* Tag selected roots */ 326 for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 327 328 ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 329 ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 330 ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 331 esf_nranks = esf_ndranks = esf_nleaves = 0; 332 for (i=0; i<nranks; i++) { 333 connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 334 for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}} 335 if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;} 336 } 337 338 /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 339 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 340 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 341 ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr); 342 p = 0; /* Counter for connected root ranks */ 343 q = 0; /* Counter for connected leaves */ 344 esf_roffset[0] = 0; 345 for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 346 connected = PETSC_FALSE; 347 for (j=roffset[i]; j<roffset[i+1]; j++) { 348 if (leafdata[rmine[j]]) { 349 esf_rmine[q] = new_ilocal[q] = rmine[j]; 350 esf_rremote[q] = rremote[j]; 351 new_iremote[q].index = rremote[j]; 352 new_iremote[q].rank = ranks[i]; 353 connected = PETSC_TRUE; 354 q++; 355 } 356 } 357 if (connected) { 358 esf_ranks[p] = ranks[i]; 359 esf_roffset[p+1] = q; 360 p++; 361 } 362 } 363 364 /* SetGraph internally resets the SF, so we only set its fields after the call */ 365 ierr = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 366 esf->nranks = esf_nranks; 367 esf->ndranks = esf_ndranks; 368 esf->ranks = esf_ranks; 369 esf->roffset = esf_roffset; 370 esf->rmine = esf_rmine; 371 esf->rremote = esf_rremote; 372 esf->nleafreqs = esf_nranks - esf_ndranks; 373 374 /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 375 bas = (PetscSF_Basic*)esf->data; 376 ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 377 /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 378 we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 379 */ 380 ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 381 ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 382 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 383 p = 0; /* Counter for connected leaf ranks */ 384 q = 0; /* Counter for connected roots */ 385 for (i=0; i<niranks; i++) { 386 connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 387 for (j=ioffset[i]; j<ioffset[i+1]; j++) { 388 if (rootdata[irootloc[j]]) { 389 bas->irootloc[q++] = irootloc[j]; 390 connected = PETSC_TRUE; 391 } 392 } 393 if (connected) { 394 bas->niranks++; 395 if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 396 bas->iranks[p] = iranks[i]; 397 bas->ioffset[p+1] = q; 398 p++; 399 } 400 } 401 bas->itotal = q; 402 bas->nrootreqs = bas->niranks - bas->ndiranks; 403 esf->persistent = PETSC_TRUE; 404 /* Setup packing related fields */ 405 ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr); 406 407 /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 408 #if defined(PETSC_HAVE_CUDA) 409 if (esf->backend == PETSCSF_BACKEND_CUDA) { 410 esf->ops->Malloc = PetscSFMalloc_CUDA; 411 esf->ops->Free = PetscSFFree_CUDA; 412 } 413 #endif 414 415 #if defined(PETSC_HAVE_HIP) 416 /* TODO: Needs debugging */ 417 if (esf->backend == PETSCSF_BACKEND_HIP) { 418 esf->ops->Malloc = PetscSFMalloc_HIP; 419 esf->ops->Free = PetscSFFree_HIP; 420 } 421 #endif 422 423 #if defined(PETSC_HAVE_KOKKOS) 424 if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 425 esf->ops->Malloc = PetscSFMalloc_Kokkos; 426 esf->ops->Free = PetscSFFree_Kokkos; 427 } 428 #endif 429 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 430 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 431 *newsf = esf; 432 PetscFunctionReturn(0); 433 } 434 435 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 436 { 437 PetscSF_Basic *dat; 438 PetscErrorCode ierr; 439 440 PetscFunctionBegin; 441 sf->ops->SetUp = PetscSFSetUp_Basic; 442 sf->ops->Reset = PetscSFReset_Basic; 443 sf->ops->Destroy = PetscSFDestroy_Basic; 444 sf->ops->View = PetscSFView_Basic; 445 sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 446 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 447 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 448 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 449 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 450 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 451 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 452 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 453 454 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 455 sf->data = (void*)dat; 456 PetscFunctionReturn(0); 457 } 458