xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 00816365229eff0fcb3567887feb4c4abe2b0e86)
18cd53115SBarry Smith 
240e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
3cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h>
4b23bfdefSJunchao Zhang 
540e23c03SJunchao Zhang /*===================================================================================*/
640e23c03SJunchao Zhang /*              SF public interface implementations                                  */
740e23c03SJunchao Zhang /*===================================================================================*/
840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
995fce210SBarry Smith {
1095fce210SBarry Smith   PetscErrorCode ierr;
11b23bfdefSJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
1295fce210SBarry Smith   PetscInt       *rlengths,*ilengths,i;
1340e23c03SJunchao Zhang   PetscMPIInt    rank,niranks,*iranks,tag;
1495fce210SBarry Smith   MPI_Comm       comm;
15b5a8e515SJed Brown   MPI_Group      group;
1640e23c03SJunchao Zhang   MPI_Request    *rootreqs,*leafreqs;
1795fce210SBarry Smith 
1895fce210SBarry Smith   PetscFunctionBegin;
19b5a8e515SJed Brown   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
20b5a8e515SJed Brown   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
21b5a8e515SJed Brown   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
2295fce210SBarry Smith   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2340e23c03SJunchao Zhang   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
24c943f53fSJed Brown   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2595fce210SBarry Smith   /*
2695fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
2795fce210SBarry Smith    */
28785e854fSJed Brown   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
29cd620004SJunchao Zhang   /* Determine number, sending ranks and length of incoming */
3095fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
3195fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
3295fce210SBarry Smith   }
3340e23c03SJunchao Zhang   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
34c943f53fSJed Brown 
350b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
360b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
370b899082SJunchao Zhang      small and the sorting is cheap.
380b899082SJunchao Zhang    */
390b899082SJunchao Zhang   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
400b899082SJunchao Zhang 
41c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
42c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
43c943f53fSJed Brown   bas->niranks = bas->ndiranks + niranks;
44c943f53fSJed Brown   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
45c943f53fSJed Brown   bas->ioffset[0] = 0;
46c943f53fSJed Brown   for (i=0; i<bas->ndiranks; i++) {
47c943f53fSJed Brown     bas->iranks[i] = sf->ranks[i];
48c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
49c943f53fSJed Brown   }
5040e23c03SJunchao Zhang   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
5140e23c03SJunchao Zhang   for ( ; i<bas->niranks; i++) {
52c943f53fSJed Brown     bas->iranks[i] = iranks[i-bas->ndiranks];
53c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
54c943f53fSJed Brown   }
55c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
5695fce210SBarry Smith   ierr = PetscFree(rlengths);CHKERRQ(ierr);
57c943f53fSJed Brown   ierr = PetscFree(iranks);CHKERRQ(ierr);
58c943f53fSJed Brown   ierr = PetscFree(ilengths);CHKERRQ(ierr);
5995fce210SBarry Smith 
6095fce210SBarry Smith   /* Send leaf identities to roots */
61c943f53fSJed Brown   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
6240e23c03SJunchao Zhang   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
6340e23c03SJunchao Zhang   for (i=bas->ndiranks; i<bas->niranks; i++) {
6440e23c03SJunchao Zhang     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr);
6540e23c03SJunchao Zhang   }
6640e23c03SJunchao Zhang   for (i=0; i<sf->nranks; i++) {
6795fce210SBarry Smith     PetscMPIInt npoints;
6895fce210SBarry Smith     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
6940e23c03SJunchao Zhang     if (i < sf->ndranks) {
7040e23c03SJunchao Zhang       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
7140e23c03SJunchao Zhang       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
7240e23c03SJunchao Zhang       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
7340e23c03SJunchao Zhang       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
74c943f53fSJed Brown       continue;
75c943f53fSJed Brown     }
7640e23c03SJunchao Zhang     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr);
77bf39f1bfSJed Brown   }
7840e23c03SJunchao Zhang   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
7940e23c03SJunchao Zhang   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
8040e23c03SJunchao Zhang   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
8195fce210SBarry Smith 
82cd620004SJunchao Zhang   sf->nleafreqs  = sf->nranks - sf->ndranks;
83cd620004SJunchao Zhang   bas->nrootreqs = bas->niranks - bas->ndiranks;
84cd620004SJunchao Zhang   sf->persistent = PETSC_TRUE;
85eb02082bSJunchao Zhang 
86cd620004SJunchao Zhang   /* Setup fields related to packing */
87cd620004SJunchao Zhang   ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr);
8895fce210SBarry Smith   PetscFunctionReturn(0);
8995fce210SBarry Smith }
9095fce210SBarry Smith 
9140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
9295fce210SBarry Smith {
9395fce210SBarry Smith   PetscErrorCode    ierr;
94cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
9595fce210SBarry Smith 
9695fce210SBarry Smith   PetscFunctionBegin;
9729046d53SLisandro Dalcin   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
98c943f53fSJed Brown   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
99c943f53fSJed Brown   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
100eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
101cd620004SJunchao Zhang   {
102cd620004SJunchao Zhang   PetscInt  i;
103cd620004SJunchao Zhang   for (i=0; i<2; i++) {if (bas->irootloc_d[i]) {cudaError_t err = cudaFree(bas->irootloc_d[i]);CHKERRCUDA(err);bas->irootloc_d[i]=NULL;}}
104cd620004SJunchao Zhang   }
105eb02082bSJunchao Zhang #endif
106cd620004SJunchao Zhang   ierr = PetscSFLinkDestroy(sf,&bas->avail);CHKERRQ(ierr);
107cd620004SJunchao Zhang   ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr);
10895fce210SBarry Smith   PetscFunctionReturn(0);
10995fce210SBarry Smith }
11095fce210SBarry Smith 
11140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
11295fce210SBarry Smith {
11395fce210SBarry Smith   PetscErrorCode ierr;
11495fce210SBarry Smith 
11595fce210SBarry Smith   PetscFunctionBegin;
116f6d956f6SStefano Zampini   ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr);
11795fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
11895fce210SBarry Smith   PetscFunctionReturn(0);
11995fce210SBarry Smith }
12095fce210SBarry Smith 
12140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
12295fce210SBarry Smith {
12395fce210SBarry Smith   PetscErrorCode ierr;
12495fce210SBarry Smith   PetscBool      iascii;
12595fce210SBarry Smith 
12695fce210SBarry Smith   PetscFunctionBegin;
12795fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
128b23bfdefSJunchao Zhang   if (iascii) {ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
12995fce210SBarry Smith   PetscFunctionReturn(0);
13095fce210SBarry Smith }
13195fce210SBarry Smith 
132eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
13395fce210SBarry Smith {
13495fce210SBarry Smith   PetscErrorCode    ierr;
135cd620004SJunchao Zhang   PetscSFLink       link = NULL;
136851d6770SJunchao Zhang   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;
137cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
13895fce210SBarry Smith 
13995fce210SBarry Smith   PetscFunctionBegin;
140cd620004SJunchao Zhang   /* Create a communication link, which provides buffers & MPI requests etc */
141cd620004SJunchao Zhang   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr);
142cd620004SJunchao Zhang   /* Get MPI requests from the link. We do not need buffers explicitly since we use persistent MPI */
143cd620004SJunchao Zhang   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr);
144cd620004SJunchao Zhang   /* Post Irecv for remote */
145cd620004SJunchao Zhang   ierr = MPI_Startall_irecv(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRQ(ierr);
146cd620004SJunchao Zhang   /* Pack rootdata and do Isend for remote */
147cd620004SJunchao Zhang   ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr);
148cd620004SJunchao Zhang   ierr = MPI_Startall_isend(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRQ(ierr);
149cd620004SJunchao Zhang   /* Do local BcastAndOp, which overlaps with the irecv/isend above */
150cd620004SJunchao Zhang   ierr = PetscSFLinkBcastAndOpLocal(sf,link,rootdata,leafdata,op);CHKERRQ(ierr);
15195fce210SBarry Smith   PetscFunctionReturn(0);
15295fce210SBarry Smith }
15395fce210SBarry Smith 
154*00816365SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
15595fce210SBarry Smith {
15695fce210SBarry Smith   PetscErrorCode    ierr;
157cd620004SJunchao Zhang   PetscSFLink       link = NULL;
15895fce210SBarry Smith 
15995fce210SBarry Smith   PetscFunctionBegin;
160cd620004SJunchao Zhang   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
161cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
162cd620004SJunchao Zhang   /* Wait for the completion of mpi */
163cd620004SJunchao Zhang   ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
164cd620004SJunchao Zhang   /* Unpack leafdata and reclaim the link */
165cd620004SJunchao Zhang   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr);
166cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
167cd620004SJunchao Zhang   PetscFunctionReturn(0);
168cd620004SJunchao Zhang }
169cd620004SJunchao Zhang 
170cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */
171cd620004SJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
172cd620004SJunchao Zhang {
173cd620004SJunchao Zhang   PetscErrorCode    ierr;
174cd620004SJunchao Zhang   PetscSFLink       link;
175cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
176cd620004SJunchao Zhang   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;
177cd620004SJunchao Zhang 
178cd620004SJunchao Zhang   PetscFunctionBegin;
179cd620004SJunchao Zhang   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr);
180cd620004SJunchao Zhang   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr);
181cd620004SJunchao Zhang   ierr = MPI_Startall_irecv(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRQ(ierr);
182cd620004SJunchao Zhang   ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr);
183cd620004SJunchao Zhang   ierr = MPI_Startall_isend(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRQ(ierr);
184cd620004SJunchao Zhang   *out = link;
18595fce210SBarry Smith   PetscFunctionReturn(0);
18695fce210SBarry Smith }
18795fce210SBarry Smith 
18895fce210SBarry Smith /* leaf -> root with reduction */
189eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
19095fce210SBarry Smith {
19195fce210SBarry Smith   PetscErrorCode    ierr;
192cd620004SJunchao Zhang   PetscSFLink       link = NULL;
19395fce210SBarry Smith 
19495fce210SBarry Smith   PetscFunctionBegin;
195cd620004SJunchao Zhang   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr);
196cd620004SJunchao Zhang   ierr = PetscSFLinkReduceLocal(sf,link,leafdata,rootdata,op);CHKERRQ(ierr);
19795fce210SBarry Smith   PetscFunctionReturn(0);
19895fce210SBarry Smith }
19995fce210SBarry Smith 
200*00816365SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
20195fce210SBarry Smith {
20295fce210SBarry Smith   PetscErrorCode    ierr;
203cd620004SJunchao Zhang   PetscSFLink       link = NULL;
20495fce210SBarry Smith 
20595fce210SBarry Smith   PetscFunctionBegin;
206cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
207cd620004SJunchao Zhang   ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
208cd620004SJunchao Zhang   ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr);
209cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
21095fce210SBarry Smith   PetscFunctionReturn(0);
21195fce210SBarry Smith }
21295fce210SBarry Smith 
213eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
21495fce210SBarry Smith {
21595fce210SBarry Smith   PetscErrorCode    ierr;
216cd620004SJunchao Zhang   PetscSFLink       link = NULL;
21795fce210SBarry Smith 
21895fce210SBarry Smith   PetscFunctionBegin;
219cd620004SJunchao Zhang   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr);
220cd620004SJunchao Zhang   ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
22195fce210SBarry Smith   PetscFunctionReturn(0);
22295fce210SBarry Smith }
22395fce210SBarry Smith 
224*00816365SJunchao Zhang static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
22595fce210SBarry Smith {
22695fce210SBarry Smith   PetscErrorCode    ierr;
227cd620004SJunchao Zhang   PetscSFLink       link = NULL;
228851d6770SJunchao Zhang   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;
229cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
23095fce210SBarry Smith 
23195fce210SBarry Smith   PetscFunctionBegin;
232cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
23395fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
234cd620004SJunchao Zhang   ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
235cd620004SJunchao Zhang   /* Do fetch-and-op, the (remote) update results are in rootbuf */
236cd620004SJunchao Zhang   ierr = PetscSFLinkFetchRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr);
23740e23c03SJunchao Zhang 
238cd620004SJunchao Zhang   /* Bcast rootbuf to leafupdate */
239cd620004SJunchao Zhang   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr);
240cd620004SJunchao Zhang   /* Post leaf receives and root sends */
241cd620004SJunchao Zhang   ierr = MPI_Startall_irecv(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRQ(ierr);
242cd620004SJunchao Zhang   ierr = MPI_Startall_isend(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRQ(ierr);
243b23bfdefSJunchao Zhang   /* Unpack and insert fetched data into leaves */
244cd620004SJunchao Zhang   ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
245cd620004SJunchao Zhang   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPIU_REPLACE);CHKERRQ(ierr);
246cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
24795fce210SBarry Smith   PetscFunctionReturn(0);
24895fce210SBarry Smith }
24995fce210SBarry Smith 
25040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
2518750ddebSJunchao Zhang {
2528750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
2538750ddebSJunchao Zhang 
2548750ddebSJunchao Zhang   PetscFunctionBegin;
2558750ddebSJunchao Zhang   if (niranks)  *niranks  = bas->niranks;
2568750ddebSJunchao Zhang   if (iranks)   *iranks   = bas->iranks;
2578750ddebSJunchao Zhang   if (ioffset)  *ioffset  = bas->ioffset;
2588750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
2598750ddebSJunchao Zhang   PetscFunctionReturn(0);
2608750ddebSJunchao Zhang }
2618750ddebSJunchao Zhang 
262f659e5c7SJunchao Zhang /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
263f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
264f659e5c7SJunchao Zhang    was sorted before calling the routine.
265f659e5c7SJunchao Zhang  */
266f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
267f659e5c7SJunchao Zhang {
268f659e5c7SJunchao Zhang   PetscSF           esf;
269cd620004SJunchao Zhang   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
270cd620004SJunchao Zhang   PetscInt          i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
271cd620004SJunchao Zhang   char              *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */
272f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
273f659e5c7SJunchao Zhang   const PetscMPIInt *ranks,*iranks;
274cd620004SJunchao Zhang   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
275f659e5c7SJunchao Zhang   PetscBool         connected;
276f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
277f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
278f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
279f659e5c7SJunchao Zhang 
280f659e5c7SJunchao Zhang   PetscFunctionBegin;
281f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
282f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
283f659e5c7SJunchao Zhang 
284cd620004SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
285f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
286f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
287cd620004SJunchao Zhang   maxlocal = maxleaf - minleaf + 1;
288cd620004SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
289cd620004SJunchao Zhang   leafdata = leafmem - minleaf;
290f659e5c7SJunchao Zhang   /* Tag selected roots */
291f659e5c7SJunchao Zhang   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
292f659e5c7SJunchao Zhang 
293cd620004SJunchao Zhang   ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata);CHKERRQ(ierr);
294cd620004SJunchao Zhang   ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata);CHKERRQ(ierr);
295f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
296cd620004SJunchao Zhang   esf_nranks = esf_ndranks = esf_nleaves = 0;
297b23bfdefSJunchao Zhang   for (i=0; i<nranks; i++) {
298cd620004SJunchao Zhang     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
299cd620004SJunchao Zhang     for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}}
300f659e5c7SJunchao Zhang     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
301f659e5c7SJunchao Zhang   }
302f659e5c7SJunchao Zhang 
303f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
304cd620004SJunchao Zhang   ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
305cd620004SJunchao Zhang   ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
306cd620004SJunchao Zhang   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr);
307f659e5c7SJunchao Zhang   p    = 0; /* Counter for connected root ranks */
308f659e5c7SJunchao Zhang   q    = 0; /* Counter for connected leaves */
309f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
310f659e5c7SJunchao Zhang   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
311f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
312cd620004SJunchao Zhang     for (j=roffset[i]; j<roffset[i+1]; j++) {
313cd620004SJunchao Zhang       if (leafdata[rmine[j]]) {
314f659e5c7SJunchao Zhang         esf_rmine[q]         = new_ilocal[q] = rmine[j];
315f659e5c7SJunchao Zhang         esf_rremote[q]       = rremote[j];
316f659e5c7SJunchao Zhang         new_iremote[q].index = rremote[j];
317f659e5c7SJunchao Zhang         new_iremote[q].rank  = ranks[i];
318f659e5c7SJunchao Zhang         connected            = PETSC_TRUE;
319f659e5c7SJunchao Zhang         q++;
320f659e5c7SJunchao Zhang       }
321f659e5c7SJunchao Zhang     }
322f659e5c7SJunchao Zhang     if (connected) {
323f659e5c7SJunchao Zhang       esf_ranks[p]     = ranks[i];
324f659e5c7SJunchao Zhang       esf_roffset[p+1] = q;
325f659e5c7SJunchao Zhang       p++;
326f659e5c7SJunchao Zhang     }
327f659e5c7SJunchao Zhang   }
328f659e5c7SJunchao Zhang 
329f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
330cd620004SJunchao Zhang   ierr           = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
331f659e5c7SJunchao Zhang   esf->nranks    = esf_nranks;
332f659e5c7SJunchao Zhang   esf->ndranks   = esf_ndranks;
333f659e5c7SJunchao Zhang   esf->ranks     = esf_ranks;
334f659e5c7SJunchao Zhang   esf->roffset   = esf_roffset;
335f659e5c7SJunchao Zhang   esf->rmine     = esf_rmine;
336f659e5c7SJunchao Zhang   esf->rremote   = esf_rremote;
337cd620004SJunchao Zhang   esf->nleafreqs = esf_nranks - esf_ndranks;
338f659e5c7SJunchao Zhang 
339f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
340f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
341f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
342f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
343cd620004SJunchao Zhang      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
344f659e5c7SJunchao Zhang    */
345f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
346f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
347f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
348f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
349f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
350f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
351f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
352f659e5c7SJunchao Zhang     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
353cd620004SJunchao Zhang       if (rootdata[irootloc[j]]) {
354f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
355f659e5c7SJunchao Zhang         connected = PETSC_TRUE;
356f659e5c7SJunchao Zhang       }
357f659e5c7SJunchao Zhang     }
358f659e5c7SJunchao Zhang     if (connected) {
359f659e5c7SJunchao Zhang       bas->niranks++;
360f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
361f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
362f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
363f659e5c7SJunchao Zhang       p++;
364f659e5c7SJunchao Zhang     }
365f659e5c7SJunchao Zhang   }
366f659e5c7SJunchao Zhang   bas->itotal     = q;
367cd620004SJunchao Zhang   bas->nrootreqs  = bas->niranks - bas->ndiranks;
368cd620004SJunchao Zhang   esf->persistent = PETSC_TRUE;
369cd620004SJunchao Zhang   /* Setup packing related fields */
370cd620004SJunchao Zhang   ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr);
371f659e5c7SJunchao Zhang 
372f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
373cd620004SJunchao Zhang   ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
374f659e5c7SJunchao Zhang   *newsf = esf;
375f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
376f659e5c7SJunchao Zhang }
377f659e5c7SJunchao Zhang 
3788cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
37995fce210SBarry Smith {
38040e23c03SJunchao Zhang   PetscSF_Basic  *dat;
38195fce210SBarry Smith   PetscErrorCode ierr;
38295fce210SBarry Smith 
38395fce210SBarry Smith   PetscFunctionBegin;
38495fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
38595fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
38695fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
38795fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
3883482bfa8SJunchao Zhang   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
3893482bfa8SJunchao Zhang   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
39095fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
39195fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
39295fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
39395fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
3948750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
395f659e5c7SJunchao Zhang   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
39695fce210SBarry Smith 
39740e23c03SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
39840e23c03SJunchao Zhang   sf->data = (void*)dat;
39995fce210SBarry Smith   PetscFunctionReturn(0);
40095fce210SBarry Smith }
401