1 #include "petscsf.h" 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 #include <../src/vec/is/sf/impls/basic/sfpack.h> 4 5 /*===================================================================================*/ 6 /* SF public interface implementations */ 7 /*===================================================================================*/ 8 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 9 { 10 PetscErrorCode ierr; 11 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 12 PetscInt *rlengths,*ilengths,i; 13 PetscMPIInt rank,niranks,*iranks,tag; 14 MPI_Comm comm; 15 MPI_Group group; 16 MPI_Request *rootreqs,*leafreqs; 17 18 PetscFunctionBegin; 19 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr); 20 ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 21 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 22 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 23 ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 24 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 25 /* 26 * Inform roots about how many leaves and from which ranks 27 */ 28 ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 29 /* Determine number, sending ranks and length of incoming */ 30 for (i=0; i<sf->nranks; i++) { 31 rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 32 } 33 ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 34 35 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 36 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 37 small and the sorting is cheap. 38 */ 39 ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 40 41 /* Partition into distinguished and non-distinguished incoming ranks */ 42 bas->ndiranks = sf->ndranks; 43 bas->niranks = bas->ndiranks + niranks; 44 ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 45 bas->ioffset[0] = 0; 46 for (i=0; i<bas->ndiranks; i++) { 47 bas->iranks[i] = sf->ranks[i]; 48 bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 49 } 50 if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 51 for (; i<bas->niranks; i++) { 52 bas->iranks[i] = iranks[i-bas->ndiranks]; 53 bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 54 } 55 bas->itotal = bas->ioffset[i]; 56 ierr = PetscFree(rlengths);CHKERRQ(ierr); 57 ierr = PetscFree(iranks);CHKERRQ(ierr); 58 ierr = PetscFree(ilengths);CHKERRQ(ierr); 59 60 /* Send leaf identities to roots */ 61 ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 62 ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr); 63 for (i=bas->ndiranks; i<bas->niranks; i++) { 64 ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr); 65 } 66 for (i=0; i<sf->nranks; i++) { 67 PetscMPIInt npoints; 68 ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 69 if (i < sf->ndranks) { 70 if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 71 if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 72 if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 73 ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 74 continue; 75 } 76 ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr); 77 } 78 ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 79 ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 80 ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 81 82 sf->nleafreqs = sf->nranks - sf->ndranks; 83 bas->nrootreqs = bas->niranks - bas->ndiranks; 84 sf->persistent = PETSC_TRUE; 85 86 /* Setup fields related to packing */ 87 ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 92 { 93 PetscErrorCode ierr; 94 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 95 96 PetscFunctionBegin; 97 if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 98 ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 99 ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 100 #if defined(PETSC_HAVE_DEVICE) 101 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);} 102 #endif 103 ierr = PetscSFLinkDestroy(sf,&bas->avail);CHKERRQ(ierr); 104 ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 109 { 110 PetscErrorCode ierr; 111 112 PetscFunctionBegin; 113 ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr); 114 ierr = PetscFree(sf->data);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #if defined(PETSC_USE_SINGLE_LIBRARY) 119 #include <petscmat.h> 120 121 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer) 122 { 123 PetscErrorCode ierr; 124 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 125 PetscSFLink link = bas->avail; 126 PetscInt i,nrootranks,ndrootranks,myrank; 127 const PetscInt *rootoffset; 128 PetscMPIInt rank,size; 129 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 130 Mat A; 131 132 PetscFunctionBegin; 133 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 134 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 135 myrank = rank; 136 if (sf->persistent) { 137 /* amount of data I send to other ranks - global to local */ 138 ierr = MatCreateAIJ(comm,1,1,size,size,20,NULL,20,NULL,&A);CHKERRQ(ierr); 139 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 140 for (i=0; i<nrootranks; i++) { 141 ierr = MatSetValue(A,myrank,bas->iranks[i],(rootoffset[i+1] - rootoffset[i])*link->unitbytes,INSERT_VALUES);CHKERRQ(ierr); 142 } 143 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 144 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 145 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 146 ierr = MatView(A,viewer);CHKERRQ(ierr); 147 ierr = MatDestroy(&A);CHKERRQ(ierr); 148 } 149 PetscFunctionReturn(0); 150 } 151 #endif 152 153 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 154 { 155 PetscErrorCode ierr; 156 PetscBool iascii; 157 158 PetscFunctionBegin; 159 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 160 if (iascii) {ierr = PetscViewerASCIIPrintf(viewer," MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);} 161 #if defined(PETSC_USE_SINGLE_LIBRARY) 162 { 163 PetscBool ibinary; 164 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&ibinary);CHKERRQ(ierr); 165 if (ibinary) {ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr);} 166 } 167 #endif 168 PetscFunctionReturn(0); 169 } 170 171 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 172 { 173 PetscErrorCode ierr; 174 PetscSFLink link = NULL; 175 MPI_Request *rootreqs = NULL,*leafreqs = NULL; 176 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 177 178 PetscFunctionBegin; 179 /* Create a communication link, which provides buffers & MPI requests etc */ 180 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr); 181 /* Get MPI requests from the link. We do not need buffers explicitly since we use persistent MPI */ 182 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr); 183 /* Post Irecv for remote */ 184 ierr = MPI_Startall_irecv(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRMPI(ierr); 185 /* Pack rootdata and do Isend for remote */ 186 ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr); 187 ierr = MPI_Startall_isend(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRMPI(ierr); 188 /* Do local BcastAndOp, which overlaps with the irecv/isend above */ 189 ierr = PetscSFLinkBcastAndOpLocal(sf,link,rootdata,leafdata,op);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 194 { 195 PetscErrorCode ierr; 196 PetscSFLink link = NULL; 197 198 PetscFunctionBegin; 199 /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 200 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 201 /* Wait for the completion of mpi */ 202 ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 203 /* Unpack leafdata and reclaim the link */ 204 ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr); 205 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 206 PetscFunctionReturn(0); 207 } 208 209 /* Shared by ReduceBegin and FetchAndOpBegin */ 210 PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 211 { 212 PetscErrorCode ierr; 213 PetscSFLink link; 214 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 215 MPI_Request *rootreqs = NULL,*leafreqs = NULL; 216 217 PetscFunctionBegin; 218 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr); 219 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr); 220 ierr = MPI_Startall_irecv(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRMPI(ierr); 221 ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr); 222 ierr = MPI_Startall_isend(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRMPI(ierr); 223 *out = link; 224 PetscFunctionReturn(0); 225 } 226 227 /* leaf -> root with reduction */ 228 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 229 { 230 PetscErrorCode ierr; 231 PetscSFLink link = NULL; 232 233 PetscFunctionBegin; 234 ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr); 235 ierr = PetscSFLinkReduceLocal(sf,link,leafdata,rootdata,op);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 240 { 241 PetscErrorCode ierr; 242 PetscSFLink link = NULL; 243 244 PetscFunctionBegin; 245 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 246 ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 247 ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr); 248 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 253 { 254 PetscErrorCode ierr; 255 PetscSFLink link = NULL; 256 257 PetscFunctionBegin; 258 ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr); 259 ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 263 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 264 { 265 PetscErrorCode ierr; 266 PetscSFLink link = NULL; 267 MPI_Request *rootreqs = NULL,*leafreqs = NULL; 268 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 269 270 PetscFunctionBegin; 271 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 272 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 273 ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 274 /* Do fetch-and-op, the (remote) update results are in rootbuf */ 275 ierr = PetscSFLinkFetchRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr); 276 277 /* Bcast rootbuf to leafupdate */ 278 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr); 279 /* Post leaf receives and root sends */ 280 ierr = MPI_Startall_irecv(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRMPI(ierr); 281 ierr = MPI_Startall_isend(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRMPI(ierr); 282 /* Unpack and insert fetched data into leaves */ 283 ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 284 ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr); 285 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 290 { 291 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 292 293 PetscFunctionBegin; 294 if (niranks) *niranks = bas->niranks; 295 if (iranks) *iranks = bas->iranks; 296 if (ioffset) *ioffset = bas->ioffset; 297 if (irootloc) *irootloc = bas->irootloc; 298 PetscFunctionReturn(0); 299 } 300 301 /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf. 302 We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 303 was sorted before calling the routine. 304 */ 305 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 306 { 307 PetscSF esf; 308 PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote; 309 PetscInt i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 310 char *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */ 311 PetscMPIInt *esf_ranks; 312 const PetscMPIInt *ranks,*iranks; 313 const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc; 314 PetscBool connected; 315 PetscSFNode *new_iremote; 316 PetscSF_Basic *bas; 317 PetscErrorCode ierr; 318 319 PetscFunctionBegin; 320 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 321 ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr); 322 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 323 324 /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 325 ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 326 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 327 maxlocal = maxleaf - minleaf + 1; 328 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 329 leafdata = leafmem - minleaf; 330 /* Tag selected roots */ 331 for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 332 333 ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 334 ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 335 ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 336 esf_nranks = esf_ndranks = esf_nleaves = 0; 337 for (i=0; i<nranks; i++) { 338 connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 339 for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}} 340 if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;} 341 } 342 343 /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 344 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 345 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 346 ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr); 347 p = 0; /* Counter for connected root ranks */ 348 q = 0; /* Counter for connected leaves */ 349 esf_roffset[0] = 0; 350 for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 351 connected = PETSC_FALSE; 352 for (j=roffset[i]; j<roffset[i+1]; j++) { 353 if (leafdata[rmine[j]]) { 354 esf_rmine[q] = new_ilocal[q] = rmine[j]; 355 esf_rremote[q] = rremote[j]; 356 new_iremote[q].index = rremote[j]; 357 new_iremote[q].rank = ranks[i]; 358 connected = PETSC_TRUE; 359 q++; 360 } 361 } 362 if (connected) { 363 esf_ranks[p] = ranks[i]; 364 esf_roffset[p+1] = q; 365 p++; 366 } 367 } 368 369 /* SetGraph internally resets the SF, so we only set its fields after the call */ 370 ierr = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 371 esf->nranks = esf_nranks; 372 esf->ndranks = esf_ndranks; 373 esf->ranks = esf_ranks; 374 esf->roffset = esf_roffset; 375 esf->rmine = esf_rmine; 376 esf->rremote = esf_rremote; 377 esf->nleafreqs = esf_nranks - esf_ndranks; 378 379 /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 380 bas = (PetscSF_Basic*)esf->data; 381 ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 382 /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 383 we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 384 */ 385 ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 386 ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 387 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 388 p = 0; /* Counter for connected leaf ranks */ 389 q = 0; /* Counter for connected roots */ 390 for (i=0; i<niranks; i++) { 391 connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 392 for (j=ioffset[i]; j<ioffset[i+1]; j++) { 393 if (rootdata[irootloc[j]]) { 394 bas->irootloc[q++] = irootloc[j]; 395 connected = PETSC_TRUE; 396 } 397 } 398 if (connected) { 399 bas->niranks++; 400 if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 401 bas->iranks[p] = iranks[i]; 402 bas->ioffset[p+1] = q; 403 p++; 404 } 405 } 406 bas->itotal = q; 407 bas->nrootreqs = bas->niranks - bas->ndiranks; 408 esf->persistent = PETSC_TRUE; 409 /* Setup packing related fields */ 410 ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr); 411 412 /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 413 #if defined(PETSC_HAVE_CUDA) 414 if (esf->backend == PETSCSF_BACKEND_CUDA) { 415 esf->ops->Malloc = PetscSFMalloc_Cuda; 416 esf->ops->Free = PetscSFFree_Cuda; 417 } 418 #endif 419 420 #if defined(PETSC_HAVE_HIP) 421 /* TODO: Needs debugging */ 422 if (esf->backend == PETSCSF_BACKEND_HIP) { 423 esf->ops->Malloc = PetscSFMalloc_HIP; 424 esf->ops->Free = PetscSFFree_HIP; 425 } 426 #endif 427 428 #if defined(PETSC_HAVE_KOKKOS) 429 if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 430 esf->ops->Malloc = PetscSFMalloc_Kokkos; 431 esf->ops->Free = PetscSFFree_Kokkos; 432 } 433 #endif 434 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 435 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 436 *newsf = esf; 437 PetscFunctionReturn(0); 438 } 439 440 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 441 { 442 PetscSF_Basic *dat; 443 PetscErrorCode ierr; 444 445 PetscFunctionBegin; 446 sf->ops->SetUp = PetscSFSetUp_Basic; 447 sf->ops->Reset = PetscSFReset_Basic; 448 sf->ops->Destroy = PetscSFDestroy_Basic; 449 sf->ops->View = PetscSFView_Basic; 450 sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 451 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 452 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 453 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 454 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 455 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 456 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 457 sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Basic; 458 459 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 460 sf->data = (void*)dat; 461 PetscFunctionReturn(0); 462 } 463