xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision ad227fea26420e7a31ef041a9fa1418814d171a8)
120c24465SJunchao Zhang #include "petscsf.h"
240e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
3cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h>
4b23bfdefSJunchao Zhang 
540e23c03SJunchao Zhang /*===================================================================================*/
640e23c03SJunchao Zhang /*              SF public interface implementations                                  */
740e23c03SJunchao Zhang /*===================================================================================*/
840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
995fce210SBarry Smith {
1095fce210SBarry Smith   PetscErrorCode ierr;
11b23bfdefSJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
1295fce210SBarry Smith   PetscInt       *rlengths,*ilengths,i;
1340e23c03SJunchao Zhang   PetscMPIInt    rank,niranks,*iranks,tag;
1495fce210SBarry Smith   MPI_Comm       comm;
15b5a8e515SJed Brown   MPI_Group      group;
1640e23c03SJunchao Zhang   MPI_Request    *rootreqs,*leafreqs;
1795fce210SBarry Smith 
1895fce210SBarry Smith   PetscFunctionBegin;
19ffc4695bSBarry Smith   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr);
20b5a8e515SJed Brown   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
21ffc4695bSBarry Smith   ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
2295fce210SBarry Smith   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2340e23c03SJunchao Zhang   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
24ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2595fce210SBarry Smith   /*
2695fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
2795fce210SBarry Smith    */
28785e854fSJed Brown   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
29cd620004SJunchao Zhang   /* Determine number, sending ranks and length of incoming */
3095fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
3195fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
3295fce210SBarry Smith   }
3340e23c03SJunchao Zhang   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
34c943f53fSJed Brown 
350b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
360b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
370b899082SJunchao Zhang      small and the sorting is cheap.
380b899082SJunchao Zhang    */
390b899082SJunchao Zhang   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
400b899082SJunchao Zhang 
41c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
42c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
43c943f53fSJed Brown   bas->niranks = bas->ndiranks + niranks;
44c943f53fSJed Brown   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
45c943f53fSJed Brown   bas->ioffset[0] = 0;
46c943f53fSJed Brown   for (i=0; i<bas->ndiranks; i++) {
47c943f53fSJed Brown     bas->iranks[i] = sf->ranks[i];
48c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
49c943f53fSJed Brown   }
5040e23c03SJunchao Zhang   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
5140e23c03SJunchao Zhang   for (; i<bas->niranks; i++) {
52c943f53fSJed Brown     bas->iranks[i] = iranks[i-bas->ndiranks];
53c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
54c943f53fSJed Brown   }
55c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
5695fce210SBarry Smith   ierr = PetscFree(rlengths);CHKERRQ(ierr);
57c943f53fSJed Brown   ierr = PetscFree(iranks);CHKERRQ(ierr);
58c943f53fSJed Brown   ierr = PetscFree(ilengths);CHKERRQ(ierr);
5995fce210SBarry Smith 
6095fce210SBarry Smith   /* Send leaf identities to roots */
61c943f53fSJed Brown   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
6240e23c03SJunchao Zhang   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
6340e23c03SJunchao Zhang   for (i=bas->ndiranks; i<bas->niranks; i++) {
64ffc4695bSBarry Smith     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr);
6540e23c03SJunchao Zhang   }
6640e23c03SJunchao Zhang   for (i=0; i<sf->nranks; i++) {
6795fce210SBarry Smith     PetscMPIInt npoints;
6895fce210SBarry Smith     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
6940e23c03SJunchao Zhang     if (i < sf->ndranks) {
7040e23c03SJunchao Zhang       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
7140e23c03SJunchao Zhang       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
7240e23c03SJunchao Zhang       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
7340e23c03SJunchao Zhang       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
74c943f53fSJed Brown       continue;
75c943f53fSJed Brown     }
76ffc4695bSBarry Smith     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr);
77bf39f1bfSJed Brown   }
78ffc4695bSBarry Smith   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
79ffc4695bSBarry Smith   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
8040e23c03SJunchao Zhang   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
8195fce210SBarry Smith 
82cd620004SJunchao Zhang   sf->nleafreqs  = sf->nranks - sf->ndranks;
83cd620004SJunchao Zhang   bas->nrootreqs = bas->niranks - bas->ndiranks;
84cd620004SJunchao Zhang   sf->persistent = PETSC_TRUE;
85eb02082bSJunchao Zhang 
86cd620004SJunchao Zhang   /* Setup fields related to packing */
87cd620004SJunchao Zhang   ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr);
8895fce210SBarry Smith   PetscFunctionReturn(0);
8995fce210SBarry Smith }
9095fce210SBarry Smith 
9140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
9295fce210SBarry Smith {
9395fce210SBarry Smith   PetscErrorCode    ierr;
94cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
9595fce210SBarry Smith 
9695fce210SBarry Smith   PetscFunctionBegin;
9729046d53SLisandro Dalcin   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
98c943f53fSJed Brown   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
99c943f53fSJed Brown   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
1007fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
10120c24465SJunchao Zhang   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);}
102eb02082bSJunchao Zhang #endif
103cd620004SJunchao Zhang   ierr = PetscSFLinkDestroy(sf,&bas->avail);CHKERRQ(ierr);
104cd620004SJunchao Zhang   ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr);
10595fce210SBarry Smith   PetscFunctionReturn(0);
10695fce210SBarry Smith }
10795fce210SBarry Smith 
10840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
10995fce210SBarry Smith {
11095fce210SBarry Smith   PetscErrorCode ierr;
11195fce210SBarry Smith 
11295fce210SBarry Smith   PetscFunctionBegin;
113f6d956f6SStefano Zampini   ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr);
11495fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
11595fce210SBarry Smith   PetscFunctionReturn(0);
11695fce210SBarry Smith }
11795fce210SBarry Smith 
11862152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY)
11962152dedSBarry Smith #include <petscmat.h>
12062152dedSBarry Smith 
12162152dedSBarry Smith PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer)
12262152dedSBarry Smith {
12362152dedSBarry Smith   PetscErrorCode       ierr;
12462152dedSBarry Smith   PetscSF_Basic        *bas = (PetscSF_Basic*)sf->data;
12562152dedSBarry Smith   PetscSFLink          link = bas->avail;
12662152dedSBarry Smith   PetscInt             i,nrootranks,ndrootranks,myrank;
12762152dedSBarry Smith   const PetscInt       *rootoffset;
12862152dedSBarry Smith   PetscMPIInt          rank,size;
12962152dedSBarry Smith   MPI_Comm             comm = PetscObjectComm((PetscObject)sf);
13062152dedSBarry Smith   Mat                  A;
13162152dedSBarry Smith 
13262152dedSBarry Smith   PetscFunctionBegin;
13362152dedSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
13462152dedSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
13562152dedSBarry Smith   myrank = rank;
13662152dedSBarry Smith   if (sf->persistent) {
13762152dedSBarry Smith     /* amount of data I send to other ranks - global to local */
13862152dedSBarry Smith     ierr = MatCreateAIJ(comm,1,1,size,size,20,NULL,20,NULL,&A);CHKERRQ(ierr);
13962152dedSBarry Smith     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
14062152dedSBarry Smith     for (i=0; i<nrootranks; i++) {
14162152dedSBarry Smith       ierr = MatSetValue(A,myrank,bas->iranks[i],(rootoffset[i+1] - rootoffset[i])*link->unitbytes,INSERT_VALUES);CHKERRQ(ierr);
14262152dedSBarry Smith     }
14362152dedSBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
14462152dedSBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
14562152dedSBarry Smith     ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
14662152dedSBarry Smith     ierr = MatView(A,viewer);CHKERRQ(ierr);
14762152dedSBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
14862152dedSBarry Smith   }
14962152dedSBarry Smith   PetscFunctionReturn(0);
15062152dedSBarry Smith }
15162152dedSBarry Smith #endif
15262152dedSBarry Smith 
15340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
15495fce210SBarry Smith {
15595fce210SBarry Smith   PetscErrorCode ierr;
15695fce210SBarry Smith   PetscBool      iascii;
15795fce210SBarry Smith 
15895fce210SBarry Smith   PetscFunctionBegin;
15995fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
16062152dedSBarry Smith   if (iascii) {ierr = PetscViewerASCIIPrintf(viewer,"  MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
16162152dedSBarry Smith  #if defined(PETSC_USE_SINGLE_LIBRARY)
16262152dedSBarry Smith   {
16362152dedSBarry Smith     PetscBool ibinary;
16462152dedSBarry Smith     ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&ibinary);CHKERRQ(ierr);
16562152dedSBarry Smith     if (ibinary) {ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr);}
16662152dedSBarry Smith   }
16762152dedSBarry Smith  #endif
16895fce210SBarry Smith   PetscFunctionReturn(0);
16995fce210SBarry Smith }
17095fce210SBarry Smith 
171*ad227feaSJunchao Zhang static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
17295fce210SBarry Smith {
17395fce210SBarry Smith   PetscErrorCode    ierr;
174cd620004SJunchao Zhang   PetscSFLink       link = NULL;
175851d6770SJunchao Zhang   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;
176cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
17795fce210SBarry Smith 
17895fce210SBarry Smith   PetscFunctionBegin;
179cd620004SJunchao Zhang   /* Create a communication link, which provides buffers & MPI requests etc */
180cd620004SJunchao Zhang   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr);
181cd620004SJunchao Zhang   /* Get MPI requests from the link. We do not need buffers explicitly since we use persistent MPI */
182cd620004SJunchao Zhang   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr);
183cd620004SJunchao Zhang   /* Post Irecv for remote */
184ffc4695bSBarry Smith   ierr = MPI_Startall_irecv(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRMPI(ierr);
185cd620004SJunchao Zhang   /* Pack rootdata and do Isend for remote */
186cd620004SJunchao Zhang   ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr);
187ffc4695bSBarry Smith   ierr = MPI_Startall_isend(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRMPI(ierr);
188cd620004SJunchao Zhang   /* Do local BcastAndOp, which overlaps with the irecv/isend above */
189cd620004SJunchao Zhang   ierr = PetscSFLinkBcastAndOpLocal(sf,link,rootdata,leafdata,op);CHKERRQ(ierr);
19095fce210SBarry Smith   PetscFunctionReturn(0);
19195fce210SBarry Smith }
19295fce210SBarry Smith 
193*ad227feaSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
19495fce210SBarry Smith {
19595fce210SBarry Smith   PetscErrorCode    ierr;
196cd620004SJunchao Zhang   PetscSFLink       link = NULL;
19795fce210SBarry Smith 
19895fce210SBarry Smith   PetscFunctionBegin;
199cd620004SJunchao Zhang   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
200cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
201cd620004SJunchao Zhang   /* Wait for the completion of mpi */
202cd620004SJunchao Zhang   ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
203cd620004SJunchao Zhang   /* Unpack leafdata and reclaim the link */
204cd620004SJunchao Zhang   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr);
205cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
206cd620004SJunchao Zhang   PetscFunctionReturn(0);
207cd620004SJunchao Zhang }
208cd620004SJunchao Zhang 
209cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */
210cd620004SJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
211cd620004SJunchao Zhang {
212cd620004SJunchao Zhang   PetscErrorCode    ierr;
213cd620004SJunchao Zhang   PetscSFLink       link;
214cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
215cd620004SJunchao Zhang   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;
216cd620004SJunchao Zhang 
217cd620004SJunchao Zhang   PetscFunctionBegin;
218cd620004SJunchao Zhang   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr);
219cd620004SJunchao Zhang   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr);
220ffc4695bSBarry Smith   ierr = MPI_Startall_irecv(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRMPI(ierr);
221cd620004SJunchao Zhang   ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr);
222ffc4695bSBarry Smith   ierr = MPI_Startall_isend(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRMPI(ierr);
223cd620004SJunchao Zhang   *out = link;
22495fce210SBarry Smith   PetscFunctionReturn(0);
22595fce210SBarry Smith }
22695fce210SBarry Smith 
22795fce210SBarry Smith /* leaf -> root with reduction */
228eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
22995fce210SBarry Smith {
23095fce210SBarry Smith   PetscErrorCode    ierr;
231cd620004SJunchao Zhang   PetscSFLink       link = NULL;
23295fce210SBarry Smith 
23395fce210SBarry Smith   PetscFunctionBegin;
234cd620004SJunchao Zhang   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr);
235cd620004SJunchao Zhang   ierr = PetscSFLinkReduceLocal(sf,link,leafdata,rootdata,op);CHKERRQ(ierr);
23695fce210SBarry Smith   PetscFunctionReturn(0);
23795fce210SBarry Smith }
23895fce210SBarry Smith 
23900816365SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
24095fce210SBarry Smith {
24195fce210SBarry Smith   PetscErrorCode    ierr;
242cd620004SJunchao Zhang   PetscSFLink       link = NULL;
24395fce210SBarry Smith 
24495fce210SBarry Smith   PetscFunctionBegin;
245cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
246cd620004SJunchao Zhang   ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
247cd620004SJunchao Zhang   ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr);
248cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
24995fce210SBarry Smith   PetscFunctionReturn(0);
25095fce210SBarry Smith }
25195fce210SBarry Smith 
252eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
25395fce210SBarry Smith {
25495fce210SBarry Smith   PetscErrorCode    ierr;
255cd620004SJunchao Zhang   PetscSFLink       link = NULL;
25695fce210SBarry Smith 
25795fce210SBarry Smith   PetscFunctionBegin;
258cd620004SJunchao Zhang   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr);
259cd620004SJunchao Zhang   ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
26095fce210SBarry Smith   PetscFunctionReturn(0);
26195fce210SBarry Smith }
26295fce210SBarry Smith 
26300816365SJunchao Zhang static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
26495fce210SBarry Smith {
26595fce210SBarry Smith   PetscErrorCode    ierr;
266cd620004SJunchao Zhang   PetscSFLink       link = NULL;
267851d6770SJunchao Zhang   MPI_Request       *rootreqs = NULL,*leafreqs = NULL;
268cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
26995fce210SBarry Smith 
27095fce210SBarry Smith   PetscFunctionBegin;
271cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
27295fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
273cd620004SJunchao Zhang   ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
274cd620004SJunchao Zhang   /* Do fetch-and-op, the (remote) update results are in rootbuf */
275cd620004SJunchao Zhang   ierr = PetscSFLinkFetchRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr);
27640e23c03SJunchao Zhang 
277cd620004SJunchao Zhang   /* Bcast rootbuf to leafupdate */
278cd620004SJunchao Zhang   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr);
279cd620004SJunchao Zhang   /* Post leaf receives and root sends */
280ffc4695bSBarry Smith   ierr = MPI_Startall_irecv(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRMPI(ierr);
281ffc4695bSBarry Smith   ierr = MPI_Startall_isend(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRMPI(ierr);
282b23bfdefSJunchao Zhang   /* Unpack and insert fetched data into leaves */
283cd620004SJunchao Zhang   ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
28483df288dSJunchao Zhang   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr);
285cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
28695fce210SBarry Smith   PetscFunctionReturn(0);
28795fce210SBarry Smith }
28895fce210SBarry Smith 
28940e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
2908750ddebSJunchao Zhang {
2918750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
2928750ddebSJunchao Zhang 
2938750ddebSJunchao Zhang   PetscFunctionBegin;
2948750ddebSJunchao Zhang   if (niranks)  *niranks  = bas->niranks;
2958750ddebSJunchao Zhang   if (iranks)   *iranks   = bas->iranks;
2968750ddebSJunchao Zhang   if (ioffset)  *ioffset  = bas->ioffset;
2978750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
2988750ddebSJunchao Zhang   PetscFunctionReturn(0);
2998750ddebSJunchao Zhang }
3008750ddebSJunchao Zhang 
301f659e5c7SJunchao Zhang /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
302f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
303f659e5c7SJunchao Zhang    was sorted before calling the routine.
304f659e5c7SJunchao Zhang  */
305f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
306f659e5c7SJunchao Zhang {
307f659e5c7SJunchao Zhang   PetscSF           esf;
308cd620004SJunchao Zhang   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
309cd620004SJunchao Zhang   PetscInt          i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
310cd620004SJunchao Zhang   char              *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */
311f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
312f659e5c7SJunchao Zhang   const PetscMPIInt *ranks,*iranks;
313cd620004SJunchao Zhang   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
314f659e5c7SJunchao Zhang   PetscBool         connected;
315f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
316f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
317f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
318f659e5c7SJunchao Zhang 
319f659e5c7SJunchao Zhang   PetscFunctionBegin;
320f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
32120c24465SJunchao Zhang   ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr);
322f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
323f659e5c7SJunchao Zhang 
324cd620004SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
325f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
326f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
327cd620004SJunchao Zhang   maxlocal = maxleaf - minleaf + 1;
328cd620004SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
329cd620004SJunchao Zhang   leafdata = leafmem - minleaf;
330f659e5c7SJunchao Zhang   /* Tag selected roots */
331f659e5c7SJunchao Zhang   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
332f659e5c7SJunchao Zhang 
333*ad227feaSJunchao Zhang   ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
334*ad227feaSJunchao Zhang   ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
335f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
336cd620004SJunchao Zhang   esf_nranks = esf_ndranks = esf_nleaves = 0;
337b23bfdefSJunchao Zhang   for (i=0; i<nranks; i++) {
338cd620004SJunchao Zhang     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
339cd620004SJunchao Zhang     for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}}
340f659e5c7SJunchao Zhang     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
341f659e5c7SJunchao Zhang   }
342f659e5c7SJunchao Zhang 
343f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
344cd620004SJunchao Zhang   ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
345cd620004SJunchao Zhang   ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
346cd620004SJunchao Zhang   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr);
347f659e5c7SJunchao Zhang   p    = 0; /* Counter for connected root ranks */
348f659e5c7SJunchao Zhang   q    = 0; /* Counter for connected leaves */
349f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
350f659e5c7SJunchao Zhang   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
351f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
352cd620004SJunchao Zhang     for (j=roffset[i]; j<roffset[i+1]; j++) {
353cd620004SJunchao Zhang       if (leafdata[rmine[j]]) {
354f659e5c7SJunchao Zhang         esf_rmine[q]         = new_ilocal[q] = rmine[j];
355f659e5c7SJunchao Zhang         esf_rremote[q]       = rremote[j];
356f659e5c7SJunchao Zhang         new_iremote[q].index = rremote[j];
357f659e5c7SJunchao Zhang         new_iremote[q].rank  = ranks[i];
358f659e5c7SJunchao Zhang         connected            = PETSC_TRUE;
359f659e5c7SJunchao Zhang         q++;
360f659e5c7SJunchao Zhang       }
361f659e5c7SJunchao Zhang     }
362f659e5c7SJunchao Zhang     if (connected) {
363f659e5c7SJunchao Zhang       esf_ranks[p]     = ranks[i];
364f659e5c7SJunchao Zhang       esf_roffset[p+1] = q;
365f659e5c7SJunchao Zhang       p++;
366f659e5c7SJunchao Zhang     }
367f659e5c7SJunchao Zhang   }
368f659e5c7SJunchao Zhang 
369f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
370cd620004SJunchao Zhang   ierr           = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
371f659e5c7SJunchao Zhang   esf->nranks    = esf_nranks;
372f659e5c7SJunchao Zhang   esf->ndranks   = esf_ndranks;
373f659e5c7SJunchao Zhang   esf->ranks     = esf_ranks;
374f659e5c7SJunchao Zhang   esf->roffset   = esf_roffset;
375f659e5c7SJunchao Zhang   esf->rmine     = esf_rmine;
376f659e5c7SJunchao Zhang   esf->rremote   = esf_rremote;
377cd620004SJunchao Zhang   esf->nleafreqs = esf_nranks - esf_ndranks;
378f659e5c7SJunchao Zhang 
379f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
380f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
381f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
382f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
383cd620004SJunchao Zhang      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
384f659e5c7SJunchao Zhang    */
385f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
386f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
387f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
388f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
389f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
390f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
391f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
392f659e5c7SJunchao Zhang     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
393cd620004SJunchao Zhang       if (rootdata[irootloc[j]]) {
394f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
395f659e5c7SJunchao Zhang         connected = PETSC_TRUE;
396f659e5c7SJunchao Zhang       }
397f659e5c7SJunchao Zhang     }
398f659e5c7SJunchao Zhang     if (connected) {
399f659e5c7SJunchao Zhang       bas->niranks++;
400f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
401f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
402f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
403f659e5c7SJunchao Zhang       p++;
404f659e5c7SJunchao Zhang     }
405f659e5c7SJunchao Zhang   }
406f659e5c7SJunchao Zhang   bas->itotal     = q;
407cd620004SJunchao Zhang   bas->nrootreqs  = bas->niranks - bas->ndiranks;
408cd620004SJunchao Zhang   esf->persistent = PETSC_TRUE;
409cd620004SJunchao Zhang   /* Setup packing related fields */
410cd620004SJunchao Zhang   ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr);
411f659e5c7SJunchao Zhang 
41220c24465SJunchao Zhang   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
41320c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
41420c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_CUDA) {
41520c24465SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_Cuda;
41620c24465SJunchao Zhang     esf->ops->Free   = PetscSFFree_Cuda;
41720c24465SJunchao Zhang   }
41820c24465SJunchao Zhang #endif
41920c24465SJunchao Zhang 
42059af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
42159af0bd3SScott Kruger   /* TODO: Needs debugging */
42259af0bd3SScott Kruger   if (esf->backend == PETSCSF_BACKEND_HIP) {
42359af0bd3SScott Kruger     esf->ops->Malloc = PetscSFMalloc_HIP;
42459af0bd3SScott Kruger     esf->ops->Free   = PetscSFFree_HIP;
42559af0bd3SScott Kruger   }
42659af0bd3SScott Kruger #endif
42759af0bd3SScott Kruger 
42820c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
42920c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
43020c24465SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_Kokkos;
43120c24465SJunchao Zhang     esf->ops->Free   = PetscSFFree_Kokkos;
43220c24465SJunchao Zhang   }
43320c24465SJunchao Zhang #endif
434f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
435cd620004SJunchao Zhang   ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
436f659e5c7SJunchao Zhang   *newsf = esf;
437f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
438f659e5c7SJunchao Zhang }
439f659e5c7SJunchao Zhang 
4408cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
44195fce210SBarry Smith {
44240e23c03SJunchao Zhang   PetscSF_Basic  *dat;
44395fce210SBarry Smith   PetscErrorCode ierr;
44495fce210SBarry Smith 
44595fce210SBarry Smith   PetscFunctionBegin;
44695fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
44795fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
44895fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
44995fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
450*ad227feaSJunchao Zhang   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
451*ad227feaSJunchao Zhang   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
45295fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
45395fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
45495fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
45595fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
4568750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
457f659e5c7SJunchao Zhang   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
45895fce210SBarry Smith 
45940e23c03SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
46040e23c03SJunchao Zhang   sf->data = (void*)dat;
46195fce210SBarry Smith   PetscFunctionReturn(0);
46295fce210SBarry Smith }
463