xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 71438e86cf8dea9f708c08733b37fef8eb68dc06)
140e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h>
3b23bfdefSJunchao Zhang 
440e23c03SJunchao Zhang /*===================================================================================*/
540e23c03SJunchao Zhang /*              SF public interface implementations                                  */
640e23c03SJunchao Zhang /*===================================================================================*/
740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
895fce210SBarry Smith {
995fce210SBarry Smith   PetscErrorCode ierr;
10b23bfdefSJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
11*71438e86SJunchao Zhang   PetscInt       *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks;
1240e23c03SJunchao Zhang   PetscMPIInt    rank,niranks,*iranks,tag;
1395fce210SBarry Smith   MPI_Comm       comm;
14b5a8e515SJed Brown   MPI_Group      group;
1540e23c03SJunchao Zhang   MPI_Request    *rootreqs,*leafreqs;
1695fce210SBarry Smith 
1795fce210SBarry Smith   PetscFunctionBegin;
18ffc4695bSBarry Smith   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr);
19b5a8e515SJed Brown   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
20ffc4695bSBarry Smith   ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
2195fce210SBarry Smith   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2240e23c03SJunchao Zhang   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
23ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2495fce210SBarry Smith   /*
2595fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
2695fce210SBarry Smith    */
27785e854fSJed Brown   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
28cd620004SJunchao Zhang   /* Determine number, sending ranks and length of incoming */
2995fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
3095fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
3195fce210SBarry Smith   }
32*71438e86SJunchao Zhang   nRemoteRootRanks = sf->nranks-sf->ndranks;
33*71438e86SJunchao Zhang   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
34c943f53fSJed Brown 
350b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
360b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
370b899082SJunchao Zhang      small and the sorting is cheap.
380b899082SJunchao Zhang    */
390b899082SJunchao Zhang   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
400b899082SJunchao Zhang 
41c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
42c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
43c943f53fSJed Brown   bas->niranks = bas->ndiranks + niranks;
44c943f53fSJed Brown   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
45c943f53fSJed Brown   bas->ioffset[0] = 0;
46c943f53fSJed Brown   for (i=0; i<bas->ndiranks; i++) {
47c943f53fSJed Brown     bas->iranks[i] = sf->ranks[i];
48c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
49c943f53fSJed Brown   }
5040e23c03SJunchao Zhang   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
5140e23c03SJunchao Zhang   for (; i<bas->niranks; i++) {
52c943f53fSJed Brown     bas->iranks[i] = iranks[i-bas->ndiranks];
53c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
54c943f53fSJed Brown   }
55c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
5695fce210SBarry Smith   ierr = PetscFree(rlengths);CHKERRQ(ierr);
57c943f53fSJed Brown   ierr = PetscFree(iranks);CHKERRQ(ierr);
58c943f53fSJed Brown   ierr = PetscFree(ilengths);CHKERRQ(ierr);
5995fce210SBarry Smith 
6095fce210SBarry Smith   /* Send leaf identities to roots */
61*71438e86SJunchao Zhang   nRemoteLeafRanks = bas->niranks-bas->ndiranks;
62c943f53fSJed Brown   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
63*71438e86SJunchao Zhang   ierr = PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs);CHKERRQ(ierr);
6440e23c03SJunchao Zhang   for (i=bas->ndiranks; i<bas->niranks; i++) {
65ffc4695bSBarry Smith     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr);
6640e23c03SJunchao Zhang   }
6740e23c03SJunchao Zhang   for (i=0; i<sf->nranks; i++) {
6895fce210SBarry Smith     PetscMPIInt npoints;
6995fce210SBarry Smith     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
7040e23c03SJunchao Zhang     if (i < sf->ndranks) {
7140e23c03SJunchao Zhang       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
7240e23c03SJunchao Zhang       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
7340e23c03SJunchao Zhang       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
7440e23c03SJunchao Zhang       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
75c943f53fSJed Brown       continue;
76c943f53fSJed Brown     }
77ffc4695bSBarry Smith     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr);
78bf39f1bfSJed Brown   }
79*71438e86SJunchao Zhang   ierr = MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
80*71438e86SJunchao Zhang   ierr = MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
8195fce210SBarry Smith 
82*71438e86SJunchao Zhang   sf->nleafreqs  = nRemoteRootRanks;
83*71438e86SJunchao Zhang   bas->nrootreqs = nRemoteLeafRanks;
84cd620004SJunchao Zhang   sf->persistent = PETSC_TRUE;
85eb02082bSJunchao Zhang 
86*71438e86SJunchao Zhang   /* Setup fields related to packing, such as rootbuflen[] */
87cd620004SJunchao Zhang   ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr);
88*71438e86SJunchao Zhang   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
8995fce210SBarry Smith   PetscFunctionReturn(0);
9095fce210SBarry Smith }
9195fce210SBarry Smith 
9240e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
9395fce210SBarry Smith {
9495fce210SBarry Smith   PetscErrorCode    ierr;
95cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
96*71438e86SJunchao Zhang   PetscSFLink       link = bas->avail,next;
9795fce210SBarry Smith 
9895fce210SBarry Smith   PetscFunctionBegin;
9929046d53SLisandro Dalcin   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
100c943f53fSJed Brown   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
101c943f53fSJed Brown   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
102*71438e86SJunchao Zhang 
1037fd2d3dbSJunchao Zhang  #if defined(PETSC_HAVE_DEVICE)
10420c24465SJunchao Zhang   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);}
105eb02082bSJunchao Zhang  #endif
106*71438e86SJunchao Zhang 
107*71438e86SJunchao Zhang  #if defined(PETSC_HAVE_NVSHMEM)
108*71438e86SJunchao Zhang   ierr = PetscSFReset_Basic_NVSHMEM(sf);CHKERRQ(ierr);
109*71438e86SJunchao Zhang  #endif
110*71438e86SJunchao Zhang 
111*71438e86SJunchao Zhang   for (; link; link=next) {next = link->next; ierr = PetscSFLinkDestroy(sf,link);CHKERRQ(ierr);}
112*71438e86SJunchao Zhang   bas->avail = NULL;
113cd620004SJunchao Zhang   ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr);
11495fce210SBarry Smith   PetscFunctionReturn(0);
11595fce210SBarry Smith }
11695fce210SBarry Smith 
11740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
11895fce210SBarry Smith {
11995fce210SBarry Smith   PetscErrorCode ierr;
12095fce210SBarry Smith 
12195fce210SBarry Smith   PetscFunctionBegin;
122f6d956f6SStefano Zampini   ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr);
12395fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
12495fce210SBarry Smith   PetscFunctionReturn(0);
12595fce210SBarry Smith }
12695fce210SBarry Smith 
12762152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY)
12862152dedSBarry Smith #include <petscmat.h>
12962152dedSBarry Smith 
13062152dedSBarry Smith PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer)
13162152dedSBarry Smith {
13262152dedSBarry Smith   PetscErrorCode       ierr;
13362152dedSBarry Smith   PetscSF_Basic        *bas = (PetscSF_Basic*)sf->data;
13462152dedSBarry Smith   PetscSFLink          link = bas->avail;
13562152dedSBarry Smith   PetscInt             i,nrootranks,ndrootranks,myrank;
13662152dedSBarry Smith   const PetscInt       *rootoffset;
13762152dedSBarry Smith   PetscMPIInt          rank,size;
13862152dedSBarry Smith   MPI_Comm             comm = PetscObjectComm((PetscObject)sf);
13962152dedSBarry Smith   Mat                  A;
14062152dedSBarry Smith 
14162152dedSBarry Smith   PetscFunctionBegin;
14262152dedSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
14362152dedSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
14462152dedSBarry Smith   myrank = rank;
14562152dedSBarry Smith   if (sf->persistent) {
14662152dedSBarry Smith     /* amount of data I send to other ranks - global to local */
14762152dedSBarry Smith     ierr = MatCreateAIJ(comm,1,1,size,size,20,NULL,20,NULL,&A);CHKERRQ(ierr);
14862152dedSBarry Smith     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
14962152dedSBarry Smith     for (i=0; i<nrootranks; i++) {
15062152dedSBarry Smith       ierr = MatSetValue(A,myrank,bas->iranks[i],(rootoffset[i+1] - rootoffset[i])*link->unitbytes,INSERT_VALUES);CHKERRQ(ierr);
15162152dedSBarry Smith     }
15262152dedSBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15362152dedSBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15462152dedSBarry Smith     ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
15562152dedSBarry Smith     ierr = MatView(A,viewer);CHKERRQ(ierr);
15662152dedSBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
15762152dedSBarry Smith   }
15862152dedSBarry Smith   PetscFunctionReturn(0);
15962152dedSBarry Smith }
16062152dedSBarry Smith #endif
16162152dedSBarry Smith 
16240e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
16395fce210SBarry Smith {
16495fce210SBarry Smith   PetscErrorCode ierr;
16595fce210SBarry Smith   PetscBool      iascii;
16695fce210SBarry Smith 
16795fce210SBarry Smith   PetscFunctionBegin;
16895fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
16962152dedSBarry Smith   if (iascii) {ierr = PetscViewerASCIIPrintf(viewer,"  MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
17062152dedSBarry Smith  #if defined(PETSC_USE_SINGLE_LIBRARY)
17162152dedSBarry Smith   {
17262152dedSBarry Smith     PetscBool ibinary;
17362152dedSBarry Smith     ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&ibinary);CHKERRQ(ierr);
17462152dedSBarry Smith     if (ibinary) {ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr);}
17562152dedSBarry Smith   }
17662152dedSBarry Smith  #endif
17795fce210SBarry Smith   PetscFunctionReturn(0);
17895fce210SBarry Smith }
17995fce210SBarry Smith 
180ad227feaSJunchao Zhang static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
18195fce210SBarry Smith {
18295fce210SBarry Smith   PetscErrorCode    ierr;
183cd620004SJunchao Zhang   PetscSFLink       link = NULL;
18495fce210SBarry Smith 
18595fce210SBarry Smith   PetscFunctionBegin;
186*71438e86SJunchao Zhang   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
187cd620004SJunchao Zhang   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr);
188*71438e86SJunchao Zhang   /* Pack rootdata to rootbuf for remote communication */
189cd620004SJunchao Zhang   ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr);
190*71438e86SJunchao Zhang   /* Start communcation, e.g., post MPI_Isend */
191*71438e86SJunchao Zhang   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
192*71438e86SJunchao Zhang   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
193*71438e86SJunchao Zhang   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr);
19495fce210SBarry Smith   PetscFunctionReturn(0);
19595fce210SBarry Smith }
19695fce210SBarry Smith 
197ad227feaSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
19895fce210SBarry Smith {
19995fce210SBarry Smith   PetscErrorCode    ierr;
200cd620004SJunchao Zhang   PetscSFLink       link = NULL;
20195fce210SBarry Smith 
20295fce210SBarry Smith   PetscFunctionBegin;
203cd620004SJunchao Zhang   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
204cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
205*71438e86SJunchao Zhang   /* Finish remote communication, e.g., post MPI_Waitall */
206*71438e86SJunchao Zhang   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
207*71438e86SJunchao Zhang   /* Unpack data in leafbuf to leafdata for remote communication */
208cd620004SJunchao Zhang   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr);
209*71438e86SJunchao Zhang   /* Recycle the link */
210cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
211cd620004SJunchao Zhang   PetscFunctionReturn(0);
212cd620004SJunchao Zhang }
213cd620004SJunchao Zhang 
214cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */
215cd620004SJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
216cd620004SJunchao Zhang {
217cd620004SJunchao Zhang   PetscErrorCode    ierr;
218*71438e86SJunchao Zhang   PetscSFLink       link = NULL;
219cd620004SJunchao Zhang 
220cd620004SJunchao Zhang   PetscFunctionBegin;
221cd620004SJunchao Zhang   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr);
222cd620004SJunchao Zhang   ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr);
223*71438e86SJunchao Zhang   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
224cd620004SJunchao Zhang   *out = link;
22595fce210SBarry Smith   PetscFunctionReturn(0);
22695fce210SBarry Smith }
22795fce210SBarry Smith 
22895fce210SBarry Smith /* leaf -> root with reduction */
229eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
23095fce210SBarry Smith {
23195fce210SBarry Smith   PetscErrorCode    ierr;
232cd620004SJunchao Zhang   PetscSFLink       link = NULL;
23395fce210SBarry Smith 
23495fce210SBarry Smith   PetscFunctionBegin;
235cd620004SJunchao Zhang   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr);
236*71438e86SJunchao Zhang   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr);
23795fce210SBarry Smith   PetscFunctionReturn(0);
23895fce210SBarry Smith }
23995fce210SBarry Smith 
24000816365SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
24195fce210SBarry Smith {
24295fce210SBarry Smith   PetscErrorCode    ierr;
243cd620004SJunchao Zhang   PetscSFLink       link = NULL;
24495fce210SBarry Smith 
24595fce210SBarry Smith   PetscFunctionBegin;
246cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
247*71438e86SJunchao Zhang   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
248cd620004SJunchao Zhang   ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr);
249cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
25095fce210SBarry Smith   PetscFunctionReturn(0);
25195fce210SBarry Smith }
25295fce210SBarry Smith 
253eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
25495fce210SBarry Smith {
25595fce210SBarry Smith   PetscErrorCode    ierr;
256cd620004SJunchao Zhang   PetscSFLink       link = NULL;
25795fce210SBarry Smith 
25895fce210SBarry Smith   PetscFunctionBegin;
259cd620004SJunchao Zhang   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr);
260cd620004SJunchao Zhang   ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
26195fce210SBarry Smith   PetscFunctionReturn(0);
26295fce210SBarry Smith }
26395fce210SBarry Smith 
26400816365SJunchao Zhang static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
26595fce210SBarry Smith {
26695fce210SBarry Smith   PetscErrorCode    ierr;
267cd620004SJunchao Zhang   PetscSFLink       link = NULL;
26895fce210SBarry Smith 
26995fce210SBarry Smith   PetscFunctionBegin;
270cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
27195fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
272*71438e86SJunchao Zhang   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
273cd620004SJunchao Zhang   /* Do fetch-and-op, the (remote) update results are in rootbuf */
274*71438e86SJunchao Zhang   ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr);
275cd620004SJunchao Zhang   /* Bcast rootbuf to leafupdate */
276*71438e86SJunchao Zhang   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
277*71438e86SJunchao Zhang   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
278b23bfdefSJunchao Zhang   /* Unpack and insert fetched data into leaves */
27983df288dSJunchao Zhang   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr);
280cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
28195fce210SBarry Smith   PetscFunctionReturn(0);
28295fce210SBarry Smith }
28395fce210SBarry Smith 
28440e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
2858750ddebSJunchao Zhang {
2868750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
2878750ddebSJunchao Zhang 
2888750ddebSJunchao Zhang   PetscFunctionBegin;
2898750ddebSJunchao Zhang   if (niranks)  *niranks  = bas->niranks;
2908750ddebSJunchao Zhang   if (iranks)   *iranks   = bas->iranks;
2918750ddebSJunchao Zhang   if (ioffset)  *ioffset  = bas->ioffset;
2928750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
2938750ddebSJunchao Zhang   PetscFunctionReturn(0);
2948750ddebSJunchao Zhang }
2958750ddebSJunchao Zhang 
29672502a1fSJunchao Zhang /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf.
297f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
298f659e5c7SJunchao Zhang    was sorted before calling the routine.
299f659e5c7SJunchao Zhang  */
30072502a1fSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
301f659e5c7SJunchao Zhang {
302f659e5c7SJunchao Zhang   PetscSF           esf;
303cd620004SJunchao Zhang   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
304cd620004SJunchao Zhang   PetscInt          i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
305cd620004SJunchao Zhang   char              *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */
306f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
307f659e5c7SJunchao Zhang   const PetscMPIInt *ranks,*iranks;
308cd620004SJunchao Zhang   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
309f659e5c7SJunchao Zhang   PetscBool         connected;
310f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
311f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
312f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
313f659e5c7SJunchao Zhang 
314f659e5c7SJunchao Zhang   PetscFunctionBegin;
315f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
31620c24465SJunchao Zhang   ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr);
317f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
318f659e5c7SJunchao Zhang 
319cd620004SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
320f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
321f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
322cd620004SJunchao Zhang   maxlocal = maxleaf - minleaf + 1;
323cd620004SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
324cd620004SJunchao Zhang   leafdata = leafmem - minleaf;
325f659e5c7SJunchao Zhang   /* Tag selected roots */
326f659e5c7SJunchao Zhang   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
327f659e5c7SJunchao Zhang 
328ad227feaSJunchao Zhang   ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
329ad227feaSJunchao Zhang   ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
330f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
331cd620004SJunchao Zhang   esf_nranks = esf_ndranks = esf_nleaves = 0;
332b23bfdefSJunchao Zhang   for (i=0; i<nranks; i++) {
333cd620004SJunchao Zhang     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
334cd620004SJunchao Zhang     for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}}
335f659e5c7SJunchao Zhang     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
336f659e5c7SJunchao Zhang   }
337f659e5c7SJunchao Zhang 
338f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
339cd620004SJunchao Zhang   ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
340cd620004SJunchao Zhang   ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
341cd620004SJunchao Zhang   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr);
342f659e5c7SJunchao Zhang   p    = 0; /* Counter for connected root ranks */
343f659e5c7SJunchao Zhang   q    = 0; /* Counter for connected leaves */
344f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
345f659e5c7SJunchao Zhang   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
346f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
347cd620004SJunchao Zhang     for (j=roffset[i]; j<roffset[i+1]; j++) {
348cd620004SJunchao Zhang       if (leafdata[rmine[j]]) {
349f659e5c7SJunchao Zhang         esf_rmine[q]         = new_ilocal[q] = rmine[j];
350f659e5c7SJunchao Zhang         esf_rremote[q]       = rremote[j];
351f659e5c7SJunchao Zhang         new_iremote[q].index = rremote[j];
352f659e5c7SJunchao Zhang         new_iremote[q].rank  = ranks[i];
353f659e5c7SJunchao Zhang         connected            = PETSC_TRUE;
354f659e5c7SJunchao Zhang         q++;
355f659e5c7SJunchao Zhang       }
356f659e5c7SJunchao Zhang     }
357f659e5c7SJunchao Zhang     if (connected) {
358f659e5c7SJunchao Zhang       esf_ranks[p]     = ranks[i];
359f659e5c7SJunchao Zhang       esf_roffset[p+1] = q;
360f659e5c7SJunchao Zhang       p++;
361f659e5c7SJunchao Zhang     }
362f659e5c7SJunchao Zhang   }
363f659e5c7SJunchao Zhang 
364f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
365cd620004SJunchao Zhang   ierr           = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
366f659e5c7SJunchao Zhang   esf->nranks    = esf_nranks;
367f659e5c7SJunchao Zhang   esf->ndranks   = esf_ndranks;
368f659e5c7SJunchao Zhang   esf->ranks     = esf_ranks;
369f659e5c7SJunchao Zhang   esf->roffset   = esf_roffset;
370f659e5c7SJunchao Zhang   esf->rmine     = esf_rmine;
371f659e5c7SJunchao Zhang   esf->rremote   = esf_rremote;
372cd620004SJunchao Zhang   esf->nleafreqs = esf_nranks - esf_ndranks;
373f659e5c7SJunchao Zhang 
374f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
375f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
376f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
377f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
378cd620004SJunchao Zhang      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
379f659e5c7SJunchao Zhang    */
380f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
381f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
382f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
383f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
384f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
385f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
386f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
387f659e5c7SJunchao Zhang     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
388cd620004SJunchao Zhang       if (rootdata[irootloc[j]]) {
389f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
390f659e5c7SJunchao Zhang         connected = PETSC_TRUE;
391f659e5c7SJunchao Zhang       }
392f659e5c7SJunchao Zhang     }
393f659e5c7SJunchao Zhang     if (connected) {
394f659e5c7SJunchao Zhang       bas->niranks++;
395f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
396f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
397f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
398f659e5c7SJunchao Zhang       p++;
399f659e5c7SJunchao Zhang     }
400f659e5c7SJunchao Zhang   }
401f659e5c7SJunchao Zhang   bas->itotal     = q;
402cd620004SJunchao Zhang   bas->nrootreqs  = bas->niranks - bas->ndiranks;
403cd620004SJunchao Zhang   esf->persistent = PETSC_TRUE;
404cd620004SJunchao Zhang   /* Setup packing related fields */
405cd620004SJunchao Zhang   ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr);
406f659e5c7SJunchao Zhang 
40720c24465SJunchao Zhang   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
40820c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
40920c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_CUDA) {
410*71438e86SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_CUDA;
411*71438e86SJunchao Zhang     esf->ops->Free   = PetscSFFree_CUDA;
41220c24465SJunchao Zhang   }
41320c24465SJunchao Zhang #endif
41420c24465SJunchao Zhang 
41559af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
41659af0bd3SScott Kruger   /* TODO: Needs debugging */
41759af0bd3SScott Kruger   if (esf->backend == PETSCSF_BACKEND_HIP) {
41859af0bd3SScott Kruger     esf->ops->Malloc = PetscSFMalloc_HIP;
41959af0bd3SScott Kruger     esf->ops->Free   = PetscSFFree_HIP;
42059af0bd3SScott Kruger   }
42159af0bd3SScott Kruger #endif
42259af0bd3SScott Kruger 
42320c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
42420c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
42520c24465SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_Kokkos;
42620c24465SJunchao Zhang     esf->ops->Free   = PetscSFFree_Kokkos;
42720c24465SJunchao Zhang   }
42820c24465SJunchao Zhang #endif
429f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
430cd620004SJunchao Zhang   ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
431f659e5c7SJunchao Zhang   *newsf = esf;
432f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
433f659e5c7SJunchao Zhang }
434f659e5c7SJunchao Zhang 
4358cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
43695fce210SBarry Smith {
43740e23c03SJunchao Zhang   PetscSF_Basic  *dat;
43895fce210SBarry Smith   PetscErrorCode ierr;
43995fce210SBarry Smith 
44095fce210SBarry Smith   PetscFunctionBegin;
44195fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
44295fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
44395fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
44495fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
445ad227feaSJunchao Zhang   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
446ad227feaSJunchao Zhang   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
44795fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
44895fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
44995fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
45095fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
4518750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
45272502a1fSJunchao Zhang   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
45395fce210SBarry Smith 
45440e23c03SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
45540e23c03SJunchao Zhang   sf->data = (void*)dat;
45695fce210SBarry Smith   PetscFunctionReturn(0);
45795fce210SBarry Smith }
458