1 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 2 #include <../src/vec/is/sf/impls/basic/sfpack.h> 3 #include <petsc/private/viewerimpl.h> 4 5 /*===================================================================================*/ 6 /* SF public interface implementations */ 7 /*===================================================================================*/ 8 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 9 { 10 PetscErrorCode ierr; 11 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 12 PetscInt *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks; 13 PetscMPIInt rank,niranks,*iranks,tag; 14 MPI_Comm comm; 15 MPI_Group group; 16 MPI_Request *rootreqs,*leafreqs; 17 18 PetscFunctionBegin; 19 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr); 20 ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 21 ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 22 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 23 ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 24 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 25 /* 26 * Inform roots about how many leaves and from which ranks 27 */ 28 ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 29 /* Determine number, sending ranks and length of incoming */ 30 for (i=0; i<sf->nranks; i++) { 31 rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 32 } 33 nRemoteRootRanks = sf->nranks-sf->ndranks; 34 ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 35 36 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 37 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 38 small and the sorting is cheap. 39 */ 40 ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 41 42 /* Partition into distinguished and non-distinguished incoming ranks */ 43 bas->ndiranks = sf->ndranks; 44 bas->niranks = bas->ndiranks + niranks; 45 ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 46 bas->ioffset[0] = 0; 47 for (i=0; i<bas->ndiranks; i++) { 48 bas->iranks[i] = sf->ranks[i]; 49 bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 50 } 51 if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 52 for (; i<bas->niranks; i++) { 53 bas->iranks[i] = iranks[i-bas->ndiranks]; 54 bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 55 } 56 bas->itotal = bas->ioffset[i]; 57 ierr = PetscFree(rlengths);CHKERRQ(ierr); 58 ierr = PetscFree(iranks);CHKERRQ(ierr); 59 ierr = PetscFree(ilengths);CHKERRQ(ierr); 60 61 /* Send leaf identities to roots */ 62 nRemoteLeafRanks = bas->niranks-bas->ndiranks; 63 ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 64 ierr = PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs);CHKERRQ(ierr); 65 for (i=bas->ndiranks; i<bas->niranks; i++) { 66 ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr); 67 } 68 for (i=0; i<sf->nranks; i++) { 69 PetscMPIInt npoints; 70 ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 71 if (i < sf->ndranks) { 72 if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 73 if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 74 if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 75 ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 76 continue; 77 } 78 ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr); 79 } 80 ierr = MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 81 ierr = MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 82 83 sf->nleafreqs = nRemoteRootRanks; 84 bas->nrootreqs = nRemoteLeafRanks; 85 sf->persistent = PETSC_TRUE; 86 87 /* Setup fields related to packing, such as rootbuflen[] */ 88 ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr); 89 ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 90 PetscFunctionReturn(0); 91 } 92 93 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 94 { 95 PetscErrorCode ierr; 96 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 97 PetscSFLink link = bas->avail,next; 98 99 PetscFunctionBegin; 100 if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 101 ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 102 ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 103 104 #if defined(PETSC_HAVE_DEVICE) 105 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);} 106 #endif 107 108 #if defined(PETSC_HAVE_NVSHMEM) 109 ierr = PetscSFReset_Basic_NVSHMEM(sf);CHKERRQ(ierr); 110 #endif 111 112 for (; link; link=next) {next = link->next; ierr = PetscSFLinkDestroy(sf,link);CHKERRQ(ierr);} 113 bas->avail = NULL; 114 ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 119 { 120 PetscErrorCode ierr; 121 122 PetscFunctionBegin; 123 ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr); 124 ierr = PetscFree(sf->data);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #if defined(PETSC_USE_SINGLE_LIBRARY) 129 #include <petscmat.h> 130 131 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer) 132 { 133 PetscErrorCode ierr; 134 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 135 PetscInt i,nrootranks,ndrootranks; 136 const PetscInt *rootoffset; 137 PetscMPIInt rank,size; 138 const PetscMPIInt *rootranks; 139 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 140 PetscScalar unitbytes; 141 Mat A; 142 143 PetscFunctionBegin; 144 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 145 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 146 /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do 147 PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix 148 in senders' view point: how many bytes I will send to my neighbors. 149 150 Looking at a column of the matrix, one can also know how many bytes the rank will receive from others. 151 152 If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give 153 different interpretations for the same SF for different data types. Since we most care about VecScatter, 154 we uniformly treat each vertex as a PetscScalar. 155 */ 156 unitbytes = (PetscScalar)sizeof(PetscScalar); 157 158 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL);CHKERRQ(ierr); 159 ierr = MatCreateAIJ(comm,1,1,size,size,1,NULL,nrootranks-ndrootranks,NULL,&A);CHKERRQ(ierr); 160 ierr = MatSetOptionsPrefix(A,"__petsc_internal__");CHKERRQ(ierr); /* To prevent the internal A from taking any command line options */ 161 for (i=0; i<nrootranks; i++) { 162 ierr = MatSetValue(A,(PetscInt)rank,bas->iranks[i],(rootoffset[i+1]-rootoffset[i])*unitbytes,INSERT_VALUES);CHKERRQ(ierr); 163 } 164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166 ierr = MatView(A,viewer);CHKERRQ(ierr); 167 ierr = MatDestroy(&A);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 #endif 171 172 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 173 { 174 PetscErrorCode ierr; 175 PetscBool isascii; 176 177 PetscFunctionBegin; 178 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr); 179 if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {ierr = PetscViewerASCIIPrintf(viewer," MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);} 180 #if defined(PETSC_USE_SINGLE_LIBRARY) 181 else { 182 PetscBool isdraw,isbinary; 183 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 184 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 185 if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) { 186 ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr); 187 } 188 } 189 #endif 190 PetscFunctionReturn(0); 191 } 192 193 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 194 { 195 PetscErrorCode ierr; 196 PetscSFLink link = NULL; 197 198 PetscFunctionBegin; 199 /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */ 200 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr); 201 /* Pack rootdata to rootbuf for remote communication */ 202 ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr); 203 /* Start communcation, e.g., post MPI_Isend */ 204 ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 205 /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */ 206 ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr); 207 PetscFunctionReturn(0); 208 } 209 210 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 211 { 212 PetscErrorCode ierr; 213 PetscSFLink link = NULL; 214 215 PetscFunctionBegin; 216 /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 217 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 218 /* Finish remote communication, e.g., post MPI_Waitall */ 219 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 220 /* Unpack data in leafbuf to leafdata for remote communication */ 221 ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr); 222 /* Recycle the link */ 223 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 /* Shared by ReduceBegin and FetchAndOpBegin */ 228 PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 229 { 230 PetscErrorCode ierr; 231 PetscSFLink link = NULL; 232 233 PetscFunctionBegin; 234 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr); 235 ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr); 236 ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 237 *out = link; 238 PetscFunctionReturn(0); 239 } 240 241 /* leaf -> root with reduction */ 242 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 243 { 244 PetscErrorCode ierr; 245 PetscSFLink link = NULL; 246 247 PetscFunctionBegin; 248 ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr); 249 ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr); 250 PetscFunctionReturn(0); 251 } 252 253 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 254 { 255 PetscErrorCode ierr; 256 PetscSFLink link = NULL; 257 258 PetscFunctionBegin; 259 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 260 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 261 ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr); 262 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 263 PetscFunctionReturn(0); 264 } 265 266 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 267 { 268 PetscErrorCode ierr; 269 PetscSFLink link = NULL; 270 271 PetscFunctionBegin; 272 ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr); 273 ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 278 { 279 PetscErrorCode ierr; 280 PetscSFLink link = NULL; 281 282 PetscFunctionBegin; 283 ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 284 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 285 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 286 /* Do fetch-and-op, the (remote) update results are in rootbuf */ 287 ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr); 288 /* Bcast rootbuf to leafupdate */ 289 ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 290 ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 291 /* Unpack and insert fetched data into leaves */ 292 ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr); 293 ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 294 PetscFunctionReturn(0); 295 } 296 297 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 298 { 299 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 300 301 PetscFunctionBegin; 302 if (niranks) *niranks = bas->niranks; 303 if (iranks) *iranks = bas->iranks; 304 if (ioffset) *ioffset = bas->ioffset; 305 if (irootloc) *irootloc = bas->irootloc; 306 PetscFunctionReturn(0); 307 } 308 309 /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf. 310 We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 311 was sorted before calling the routine. 312 */ 313 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 314 { 315 PetscSF esf; 316 PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote; 317 PetscInt i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 318 char *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */ 319 PetscMPIInt *esf_ranks; 320 const PetscMPIInt *ranks,*iranks; 321 const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc; 322 PetscBool connected; 323 PetscSFNode *new_iremote; 324 PetscSF_Basic *bas; 325 PetscErrorCode ierr; 326 327 PetscFunctionBegin; 328 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 329 ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr); 330 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 331 332 /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 333 ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 334 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 335 maxlocal = maxleaf - minleaf + 1; 336 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 337 leafdata = leafmem - minleaf; 338 /* Tag selected roots */ 339 for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 340 341 ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 342 ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 343 ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 344 esf_nranks = esf_ndranks = esf_nleaves = 0; 345 for (i=0; i<nranks; i++) { 346 connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 347 for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}} 348 if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;} 349 } 350 351 /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 352 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 353 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 354 ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr); 355 p = 0; /* Counter for connected root ranks */ 356 q = 0; /* Counter for connected leaves */ 357 esf_roffset[0] = 0; 358 for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 359 connected = PETSC_FALSE; 360 for (j=roffset[i]; j<roffset[i+1]; j++) { 361 if (leafdata[rmine[j]]) { 362 esf_rmine[q] = new_ilocal[q] = rmine[j]; 363 esf_rremote[q] = rremote[j]; 364 new_iremote[q].index = rremote[j]; 365 new_iremote[q].rank = ranks[i]; 366 connected = PETSC_TRUE; 367 q++; 368 } 369 } 370 if (connected) { 371 esf_ranks[p] = ranks[i]; 372 esf_roffset[p+1] = q; 373 p++; 374 } 375 } 376 377 /* SetGraph internally resets the SF, so we only set its fields after the call */ 378 ierr = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 379 esf->nranks = esf_nranks; 380 esf->ndranks = esf_ndranks; 381 esf->ranks = esf_ranks; 382 esf->roffset = esf_roffset; 383 esf->rmine = esf_rmine; 384 esf->rremote = esf_rremote; 385 esf->nleafreqs = esf_nranks - esf_ndranks; 386 387 /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 388 bas = (PetscSF_Basic*)esf->data; 389 ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 390 /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 391 we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 392 */ 393 ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 394 ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 395 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 396 p = 0; /* Counter for connected leaf ranks */ 397 q = 0; /* Counter for connected roots */ 398 for (i=0; i<niranks; i++) { 399 connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 400 for (j=ioffset[i]; j<ioffset[i+1]; j++) { 401 if (rootdata[irootloc[j]]) { 402 bas->irootloc[q++] = irootloc[j]; 403 connected = PETSC_TRUE; 404 } 405 } 406 if (connected) { 407 bas->niranks++; 408 if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 409 bas->iranks[p] = iranks[i]; 410 bas->ioffset[p+1] = q; 411 p++; 412 } 413 } 414 bas->itotal = q; 415 bas->nrootreqs = bas->niranks - bas->ndiranks; 416 esf->persistent = PETSC_TRUE; 417 /* Setup packing related fields */ 418 ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr); 419 420 /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 421 #if defined(PETSC_HAVE_CUDA) 422 if (esf->backend == PETSCSF_BACKEND_CUDA) { 423 esf->ops->Malloc = PetscSFMalloc_CUDA; 424 esf->ops->Free = PetscSFFree_CUDA; 425 } 426 #endif 427 428 #if defined(PETSC_HAVE_HIP) 429 /* TODO: Needs debugging */ 430 if (esf->backend == PETSCSF_BACKEND_HIP) { 431 esf->ops->Malloc = PetscSFMalloc_HIP; 432 esf->ops->Free = PetscSFFree_HIP; 433 } 434 #endif 435 436 #if defined(PETSC_HAVE_KOKKOS) 437 if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 438 esf->ops->Malloc = PetscSFMalloc_Kokkos; 439 esf->ops->Free = PetscSFFree_Kokkos; 440 } 441 #endif 442 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 443 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 444 *newsf = esf; 445 PetscFunctionReturn(0); 446 } 447 448 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 449 { 450 PetscSF_Basic *dat; 451 PetscErrorCode ierr; 452 453 PetscFunctionBegin; 454 sf->ops->SetUp = PetscSFSetUp_Basic; 455 sf->ops->Reset = PetscSFReset_Basic; 456 sf->ops->Destroy = PetscSFDestroy_Basic; 457 sf->ops->View = PetscSFView_Basic; 458 sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 459 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 460 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 461 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 462 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 463 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 464 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 465 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 466 467 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 468 sf->data = (void*)dat; 469 PetscFunctionReturn(0); 470 } 471