xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 28b400f66ebc7ae0049166a2294dfcd3df27e64b)
140e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4b23bfdefSJunchao Zhang 
540e23c03SJunchao Zhang /*===================================================================================*/
640e23c03SJunchao Zhang /*              SF public interface implementations                                  */
740e23c03SJunchao Zhang /*===================================================================================*/
840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
995fce210SBarry Smith {
10b23bfdefSJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
1171438e86SJunchao Zhang   PetscInt       *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks;
1240e23c03SJunchao Zhang   PetscMPIInt    rank,niranks,*iranks,tag;
1395fce210SBarry Smith   MPI_Comm       comm;
14b5a8e515SJed Brown   MPI_Group      group;
1540e23c03SJunchao Zhang   MPI_Request    *rootreqs,*leafreqs;
1695fce210SBarry Smith 
1795fce210SBarry Smith   PetscFunctionBegin;
185f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_group(PETSC_COMM_SELF,&group));
195f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFSetUpRanks(sf,group));
205f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Group_free(&group));
215f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetComm((PetscObject)sf,&comm));
225f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetNewTag((PetscObject)sf,&tag));
235f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_rank(comm,&rank));
2495fce210SBarry Smith   /*
2595fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
2695fce210SBarry Smith    */
275f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(sf->nranks,&rlengths));
28cd620004SJunchao Zhang   /* Determine number, sending ranks and length of incoming */
2995fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
3095fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
3195fce210SBarry Smith   }
3271438e86SJunchao Zhang   nRemoteRootRanks = sf->nranks-sf->ndranks;
335f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths));
34c943f53fSJed Brown 
350b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
360b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
370b899082SJunchao Zhang      small and the sorting is cheap.
380b899082SJunchao Zhang    */
395f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSortMPIIntWithIntArray(niranks,iranks,ilengths));
400b899082SJunchao Zhang 
41c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
42c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
43c943f53fSJed Brown   bas->niranks = bas->ndiranks + niranks;
445f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset));
45c943f53fSJed Brown   bas->ioffset[0] = 0;
46c943f53fSJed Brown   for (i=0; i<bas->ndiranks; i++) {
47c943f53fSJed Brown     bas->iranks[i] = sf->ranks[i];
48c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
49c943f53fSJed Brown   }
502c71b3e2SJacob Faibussowitsch   PetscCheckFalse(bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank),PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
5140e23c03SJunchao Zhang   for (; i<bas->niranks; i++) {
52c943f53fSJed Brown     bas->iranks[i] = iranks[i-bas->ndiranks];
53c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
54c943f53fSJed Brown   }
55c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
565f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(rlengths));
575f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(iranks));
585f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(ilengths));
5995fce210SBarry Smith 
6095fce210SBarry Smith   /* Send leaf identities to roots */
6171438e86SJunchao Zhang   nRemoteLeafRanks = bas->niranks-bas->ndiranks;
625f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(bas->itotal,&bas->irootloc));
635f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs));
6440e23c03SJunchao Zhang   for (i=bas->ndiranks; i<bas->niranks; i++) {
655f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPIU_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]));
6640e23c03SJunchao Zhang   }
6740e23c03SJunchao Zhang   for (i=0; i<sf->nranks; i++) {
68c87b50c4SJunchao Zhang     PetscInt npoints = sf->roffset[i+1] - sf->roffset[i];
6940e23c03SJunchao Zhang     if (i < sf->ndranks) {
702c71b3e2SJacob Faibussowitsch       PetscCheckFalse(sf->ranks[i] != rank,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
712c71b3e2SJacob Faibussowitsch       PetscCheckFalse(bas->iranks[0] != rank,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
722c71b3e2SJacob Faibussowitsch       PetscCheckFalse(npoints != bas->ioffset[1]-bas->ioffset[0],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
735f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints));
74c943f53fSJed Brown       continue;
75c943f53fSJed Brown     }
765f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPIU_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]));
77bf39f1bfSJed Brown   }
785f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE));
795f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE));
8095fce210SBarry Smith 
8171438e86SJunchao Zhang   sf->nleafreqs  = nRemoteRootRanks;
8271438e86SJunchao Zhang   bas->nrootreqs = nRemoteLeafRanks;
83cd620004SJunchao Zhang   sf->persistent = PETSC_TRUE;
84eb02082bSJunchao Zhang 
8571438e86SJunchao Zhang   /* Setup fields related to packing, such as rootbuflen[] */
865f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFSetUpPackFields(sf));
875f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(rootreqs,leafreqs));
8895fce210SBarry Smith   PetscFunctionReturn(0);
8995fce210SBarry Smith }
9095fce210SBarry Smith 
9140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
9295fce210SBarry Smith {
93cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
9471438e86SJunchao Zhang   PetscSFLink       link = bas->avail,next;
9595fce210SBarry Smith 
9695fce210SBarry Smith   PetscFunctionBegin;
97*28b400f6SJacob Faibussowitsch   PetscCheck(!bas->inuse,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
985f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(bas->iranks,bas->ioffset));
995f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(bas->irootloc));
10071438e86SJunchao Zhang 
1017fd2d3dbSJunchao Zhang  #if defined(PETSC_HAVE_DEVICE)
1025f80ce2aSJacob Faibussowitsch   for (PetscInt i=0; i<2; i++) CHKERRQ(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]));
103eb02082bSJunchao Zhang  #endif
10471438e86SJunchao Zhang 
10571438e86SJunchao Zhang  #if defined(PETSC_HAVE_NVSHMEM)
1065f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFReset_Basic_NVSHMEM(sf));
10771438e86SJunchao Zhang  #endif
10871438e86SJunchao Zhang 
1095f80ce2aSJacob Faibussowitsch   for (; link; link=next) {next = link->next; CHKERRQ(PetscSFLinkDestroy(sf,link));}
11071438e86SJunchao Zhang   bas->avail = NULL;
1115f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFResetPackFields(sf));
11295fce210SBarry Smith   PetscFunctionReturn(0);
11395fce210SBarry Smith }
11495fce210SBarry Smith 
11540e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
11695fce210SBarry Smith {
11795fce210SBarry Smith   PetscFunctionBegin;
1185f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFReset_Basic(sf));
1195f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(sf->data));
12095fce210SBarry Smith   PetscFunctionReturn(0);
12195fce210SBarry Smith }
12295fce210SBarry Smith 
12362152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY)
12462152dedSBarry Smith #include <petscmat.h>
12562152dedSBarry Smith 
12662152dedSBarry Smith PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer)
12762152dedSBarry Smith {
12862152dedSBarry Smith   PetscSF_Basic        *bas = (PetscSF_Basic*)sf->data;
12953dd6d7dSJunchao Zhang   PetscInt             i,nrootranks,ndrootranks;
13062152dedSBarry Smith   const PetscInt       *rootoffset;
13162152dedSBarry Smith   PetscMPIInt          rank,size;
13253dd6d7dSJunchao Zhang   const PetscMPIInt    *rootranks;
13362152dedSBarry Smith   MPI_Comm             comm = PetscObjectComm((PetscObject)sf);
13453dd6d7dSJunchao Zhang   PetscScalar          unitbytes;
13562152dedSBarry Smith   Mat                  A;
13662152dedSBarry Smith 
13762152dedSBarry Smith   PetscFunctionBegin;
1385f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(comm,&size));
1395f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_rank(comm,&rank));
14053dd6d7dSJunchao Zhang   /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
14153dd6d7dSJunchao Zhang     PetscSFBcast, i.e., roots send data to leaves.  We dump the communication pattern into a matrix
14253dd6d7dSJunchao Zhang     in senders' view point: how many bytes I will send to my neighbors.
14353dd6d7dSJunchao Zhang 
14453dd6d7dSJunchao Zhang     Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
14553dd6d7dSJunchao Zhang 
14653dd6d7dSJunchao Zhang     If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
14753dd6d7dSJunchao Zhang     different interpretations for the same SF for different data types. Since we most care about VecScatter,
14853dd6d7dSJunchao Zhang     we uniformly treat each vertex as a PetscScalar.
14953dd6d7dSJunchao Zhang   */
15053dd6d7dSJunchao Zhang   unitbytes = (PetscScalar)sizeof(PetscScalar);
15153dd6d7dSJunchao Zhang 
1525f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL));
1535f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreateAIJ(comm,1,1,size,size,1,NULL,nrootranks-ndrootranks,NULL,&A));
1545f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetOptionsPrefix(A,"__petsc_internal__")); /* To prevent the internal A from taking any command line options */
15562152dedSBarry Smith   for (i=0; i<nrootranks; i++) {
1565f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetValue(A,(PetscInt)rank,bas->iranks[i],(rootoffset[i+1]-rootoffset[i])*unitbytes,INSERT_VALUES));
15762152dedSBarry Smith   }
1585f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1595f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1605f80ce2aSJacob Faibussowitsch   CHKERRQ(MatView(A,viewer));
1615f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&A));
16262152dedSBarry Smith   PetscFunctionReturn(0);
16362152dedSBarry Smith }
16462152dedSBarry Smith #endif
16562152dedSBarry Smith 
16640e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
16795fce210SBarry Smith {
16853dd6d7dSJunchao Zhang   PetscBool      isascii;
16995fce210SBarry Smith 
17095fce210SBarry Smith   PetscFunctionBegin;
1715f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
1725f80ce2aSJacob Faibussowitsch   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) CHKERRQ(PetscViewerASCIIPrintf(viewer,"  MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered"));
17362152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY)
17453dd6d7dSJunchao Zhang   else {
17553dd6d7dSJunchao Zhang     PetscBool  isdraw,isbinary;
1765f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1775f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
17853dd6d7dSJunchao Zhang     if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) {
1795f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscSFView_Basic_PatternAndSizes(sf,viewer));
18053dd6d7dSJunchao Zhang     }
18162152dedSBarry Smith   }
18262152dedSBarry Smith #endif
18395fce210SBarry Smith   PetscFunctionReturn(0);
18495fce210SBarry Smith }
18595fce210SBarry Smith 
186ad227feaSJunchao Zhang static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
18795fce210SBarry Smith {
188cd620004SJunchao Zhang   PetscSFLink       link = NULL;
18995fce210SBarry Smith 
19095fce210SBarry Smith   PetscFunctionBegin;
19171438e86SJunchao Zhang   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
1925f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link));
19371438e86SJunchao Zhang   /* Pack rootdata to rootbuf for remote communication */
1945f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata));
19571438e86SJunchao Zhang   /* Start communcation, e.g., post MPI_Isend */
1965f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF));
19771438e86SJunchao Zhang   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
1985f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op));
19995fce210SBarry Smith   PetscFunctionReturn(0);
20095fce210SBarry Smith }
20195fce210SBarry Smith 
202ad227feaSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
20395fce210SBarry Smith {
204cd620004SJunchao Zhang   PetscSFLink       link = NULL;
20595fce210SBarry Smith 
20695fce210SBarry Smith   PetscFunctionBegin;
207cd620004SJunchao Zhang   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
2085f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link));
20971438e86SJunchao Zhang   /* Finish remote communication, e.g., post MPI_Waitall */
2105f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF));
21171438e86SJunchao Zhang   /* Unpack data in leafbuf to leafdata for remote communication */
2125f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op));
21371438e86SJunchao Zhang   /* Recycle the link */
2145f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkReclaim(sf,&link));
215cd620004SJunchao Zhang   PetscFunctionReturn(0);
216cd620004SJunchao Zhang }
217cd620004SJunchao Zhang 
218cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */
2199fbee547SJacob Faibussowitsch static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
220cd620004SJunchao Zhang {
22171438e86SJunchao Zhang   PetscSFLink       link = NULL;
222cd620004SJunchao Zhang 
223cd620004SJunchao Zhang   PetscFunctionBegin;
2245f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link));
2255f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata));
2265f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT));
227cd620004SJunchao Zhang   *out = link;
22895fce210SBarry Smith   PetscFunctionReturn(0);
22995fce210SBarry Smith }
23095fce210SBarry Smith 
23195fce210SBarry Smith /* leaf -> root with reduction */
232eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
23395fce210SBarry Smith {
234cd620004SJunchao Zhang   PetscSFLink       link = NULL;
23595fce210SBarry Smith 
23695fce210SBarry Smith   PetscFunctionBegin;
2375f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link));
2385f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op));
23995fce210SBarry Smith   PetscFunctionReturn(0);
24095fce210SBarry Smith }
24195fce210SBarry Smith 
24200816365SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
24395fce210SBarry Smith {
244cd620004SJunchao Zhang   PetscSFLink       link = NULL;
24595fce210SBarry Smith 
24695fce210SBarry Smith   PetscFunctionBegin;
2475f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link));
2485f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT));
2495f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op));
2505f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkReclaim(sf,&link));
25195fce210SBarry Smith   PetscFunctionReturn(0);
25295fce210SBarry Smith }
25395fce210SBarry Smith 
254eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
25595fce210SBarry Smith {
256cd620004SJunchao Zhang   PetscSFLink       link = NULL;
25795fce210SBarry Smith 
25895fce210SBarry Smith   PetscFunctionBegin;
2595f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link));
2605f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op));
26195fce210SBarry Smith   PetscFunctionReturn(0);
26295fce210SBarry Smith }
26395fce210SBarry Smith 
26400816365SJunchao Zhang static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
26595fce210SBarry Smith {
266cd620004SJunchao Zhang   PetscSFLink       link = NULL;
26795fce210SBarry Smith 
26895fce210SBarry Smith   PetscFunctionBegin;
2695f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link));
27095fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
2715f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT));
272cd620004SJunchao Zhang   /* Do fetch-and-op, the (remote) update results are in rootbuf */
2735f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op));
274cd620004SJunchao Zhang   /* Bcast rootbuf to leafupdate */
2755f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF));
2765f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF));
277b23bfdefSJunchao Zhang   /* Unpack and insert fetched data into leaves */
2785f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE));
2795f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFLinkReclaim(sf,&link));
28095fce210SBarry Smith   PetscFunctionReturn(0);
28195fce210SBarry Smith }
28295fce210SBarry Smith 
28340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
2848750ddebSJunchao Zhang {
2858750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
2868750ddebSJunchao Zhang 
2878750ddebSJunchao Zhang   PetscFunctionBegin;
2888750ddebSJunchao Zhang   if (niranks)  *niranks  = bas->niranks;
2898750ddebSJunchao Zhang   if (iranks)   *iranks   = bas->iranks;
2908750ddebSJunchao Zhang   if (ioffset)  *ioffset  = bas->ioffset;
2918750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
2928750ddebSJunchao Zhang   PetscFunctionReturn(0);
2938750ddebSJunchao Zhang }
2948750ddebSJunchao Zhang 
29572502a1fSJunchao Zhang /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf.
296f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
297f659e5c7SJunchao Zhang    was sorted before calling the routine.
298f659e5c7SJunchao Zhang  */
29972502a1fSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
300f659e5c7SJunchao Zhang {
301f659e5c7SJunchao Zhang   PetscSF           esf;
302cd620004SJunchao Zhang   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
303cd620004SJunchao Zhang   PetscInt          i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
304cd620004SJunchao Zhang   char              *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */
305f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
306f659e5c7SJunchao Zhang   const PetscMPIInt *ranks,*iranks;
307cd620004SJunchao Zhang   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
308f659e5c7SJunchao Zhang   PetscBool         connected;
309f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
310f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
311f659e5c7SJunchao Zhang 
312f659e5c7SJunchao Zhang   PetscFunctionBegin;
3135f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf));
3145f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFSetFromOptions(esf));
3155f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFSetType(esf,PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
316f659e5c7SJunchao Zhang 
317cd620004SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
3185f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL));
3195f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFGetLeafRange(sf,&minleaf,&maxleaf));
320cd620004SJunchao Zhang   maxlocal = maxleaf - minleaf + 1;
3215f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem));
322cd620004SJunchao Zhang   leafdata = leafmem - minleaf;
323f659e5c7SJunchao Zhang   /* Tag selected roots */
324f659e5c7SJunchao Zhang   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
325f659e5c7SJunchao Zhang 
3265f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE));
3275f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE));
3285f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote)); /* Get send info */
329cd620004SJunchao Zhang   esf_nranks = esf_ndranks = esf_nleaves = 0;
330b23bfdefSJunchao Zhang   for (i=0; i<nranks; i++) {
331cd620004SJunchao Zhang     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
332cd620004SJunchao Zhang     for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}}
333f659e5c7SJunchao Zhang     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
334f659e5c7SJunchao Zhang   }
335f659e5c7SJunchao Zhang 
336f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
3375f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(esf_nleaves,&new_ilocal));
3385f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(esf_nleaves,&new_iremote));
3395f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote));
340f659e5c7SJunchao Zhang   p    = 0; /* Counter for connected root ranks */
341f659e5c7SJunchao Zhang   q    = 0; /* Counter for connected leaves */
342f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
343f659e5c7SJunchao Zhang   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
344f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
345cd620004SJunchao Zhang     for (j=roffset[i]; j<roffset[i+1]; j++) {
346cd620004SJunchao Zhang       if (leafdata[rmine[j]]) {
347f659e5c7SJunchao Zhang         esf_rmine[q]         = new_ilocal[q] = rmine[j];
348f659e5c7SJunchao Zhang         esf_rremote[q]       = rremote[j];
349f659e5c7SJunchao Zhang         new_iremote[q].index = rremote[j];
350f659e5c7SJunchao Zhang         new_iremote[q].rank  = ranks[i];
351f659e5c7SJunchao Zhang         connected            = PETSC_TRUE;
352f659e5c7SJunchao Zhang         q++;
353f659e5c7SJunchao Zhang       }
354f659e5c7SJunchao Zhang     }
355f659e5c7SJunchao Zhang     if (connected) {
356f659e5c7SJunchao Zhang       esf_ranks[p]     = ranks[i];
357f659e5c7SJunchao Zhang       esf_roffset[p+1] = q;
358f659e5c7SJunchao Zhang       p++;
359f659e5c7SJunchao Zhang     }
360f659e5c7SJunchao Zhang   }
361f659e5c7SJunchao Zhang 
362f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
3635f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
364f659e5c7SJunchao Zhang   esf->nranks    = esf_nranks;
365f659e5c7SJunchao Zhang   esf->ndranks   = esf_ndranks;
366f659e5c7SJunchao Zhang   esf->ranks     = esf_ranks;
367f659e5c7SJunchao Zhang   esf->roffset   = esf_roffset;
368f659e5c7SJunchao Zhang   esf->rmine     = esf_rmine;
369f659e5c7SJunchao Zhang   esf->rremote   = esf_rremote;
370cd620004SJunchao Zhang   esf->nleafreqs = esf_nranks - esf_ndranks;
371f659e5c7SJunchao Zhang 
372f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
373f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
3745f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc)); /* Get recv info */
375f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
376cd620004SJunchao Zhang      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
377f659e5c7SJunchao Zhang    */
3785f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset));
3795f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(ioffset[niranks],&bas->irootloc));
380f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
381f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
382f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
383f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
384f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
385f659e5c7SJunchao Zhang     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
386cd620004SJunchao Zhang       if (rootdata[irootloc[j]]) {
387f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
388f659e5c7SJunchao Zhang         connected = PETSC_TRUE;
389f659e5c7SJunchao Zhang       }
390f659e5c7SJunchao Zhang     }
391f659e5c7SJunchao Zhang     if (connected) {
392f659e5c7SJunchao Zhang       bas->niranks++;
393f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
394f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
395f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
396f659e5c7SJunchao Zhang       p++;
397f659e5c7SJunchao Zhang     }
398f659e5c7SJunchao Zhang   }
399f659e5c7SJunchao Zhang   bas->itotal     = q;
400cd620004SJunchao Zhang   bas->nrootreqs  = bas->niranks - bas->ndiranks;
401cd620004SJunchao Zhang   esf->persistent = PETSC_TRUE;
402cd620004SJunchao Zhang   /* Setup packing related fields */
4035f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscSFSetUpPackFields(esf));
404f659e5c7SJunchao Zhang 
40520c24465SJunchao Zhang   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
40620c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
40720c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_CUDA) {
40871438e86SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_CUDA;
40971438e86SJunchao Zhang     esf->ops->Free   = PetscSFFree_CUDA;
41020c24465SJunchao Zhang   }
41120c24465SJunchao Zhang #endif
41220c24465SJunchao Zhang 
41359af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
41459af0bd3SScott Kruger   /* TODO: Needs debugging */
41559af0bd3SScott Kruger   if (esf->backend == PETSCSF_BACKEND_HIP) {
41659af0bd3SScott Kruger     esf->ops->Malloc = PetscSFMalloc_HIP;
41759af0bd3SScott Kruger     esf->ops->Free   = PetscSFFree_HIP;
41859af0bd3SScott Kruger   }
41959af0bd3SScott Kruger #endif
42059af0bd3SScott Kruger 
42120c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
42220c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
42320c24465SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_Kokkos;
42420c24465SJunchao Zhang     esf->ops->Free   = PetscSFFree_Kokkos;
42520c24465SJunchao Zhang   }
42620c24465SJunchao Zhang #endif
427f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
4285f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(rootdata,leafmem));
429f659e5c7SJunchao Zhang   *newsf = esf;
430f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
431f659e5c7SJunchao Zhang }
432f659e5c7SJunchao Zhang 
4338cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
43495fce210SBarry Smith {
43540e23c03SJunchao Zhang   PetscSF_Basic  *dat;
43695fce210SBarry Smith 
43795fce210SBarry Smith   PetscFunctionBegin;
43895fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
43995fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
44095fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
44195fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
442ad227feaSJunchao Zhang   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
443ad227feaSJunchao Zhang   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
44495fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
44595fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
44695fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
44795fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
4488750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
44972502a1fSJunchao Zhang   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
45095fce210SBarry Smith 
4515f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscNewLog(sf,&dat));
45240e23c03SJunchao Zhang   sf->data = (void*)dat;
45395fce210SBarry Smith   PetscFunctionReturn(0);
45495fce210SBarry Smith }
455