xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 53dd6d7dbccec090ec8f172c916aeefdd1842daa)
140e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h>
3*53dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4b23bfdefSJunchao Zhang 
540e23c03SJunchao Zhang /*===================================================================================*/
640e23c03SJunchao Zhang /*              SF public interface implementations                                  */
740e23c03SJunchao Zhang /*===================================================================================*/
840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
995fce210SBarry Smith {
1095fce210SBarry Smith   PetscErrorCode ierr;
11b23bfdefSJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
1271438e86SJunchao Zhang   PetscInt       *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks;
1340e23c03SJunchao Zhang   PetscMPIInt    rank,niranks,*iranks,tag;
1495fce210SBarry Smith   MPI_Comm       comm;
15b5a8e515SJed Brown   MPI_Group      group;
1640e23c03SJunchao Zhang   MPI_Request    *rootreqs,*leafreqs;
1795fce210SBarry Smith 
1895fce210SBarry Smith   PetscFunctionBegin;
19ffc4695bSBarry Smith   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr);
20b5a8e515SJed Brown   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
21ffc4695bSBarry Smith   ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
2295fce210SBarry Smith   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2340e23c03SJunchao Zhang   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
24ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2595fce210SBarry Smith   /*
2695fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
2795fce210SBarry Smith    */
28785e854fSJed Brown   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
29cd620004SJunchao Zhang   /* Determine number, sending ranks and length of incoming */
3095fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
3195fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
3295fce210SBarry Smith   }
3371438e86SJunchao Zhang   nRemoteRootRanks = sf->nranks-sf->ndranks;
3471438e86SJunchao Zhang   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
35c943f53fSJed Brown 
360b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
370b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
380b899082SJunchao Zhang      small and the sorting is cheap.
390b899082SJunchao Zhang    */
400b899082SJunchao Zhang   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
410b899082SJunchao Zhang 
42c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
43c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
44c943f53fSJed Brown   bas->niranks = bas->ndiranks + niranks;
45c943f53fSJed Brown   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
46c943f53fSJed Brown   bas->ioffset[0] = 0;
47c943f53fSJed Brown   for (i=0; i<bas->ndiranks; i++) {
48c943f53fSJed Brown     bas->iranks[i] = sf->ranks[i];
49c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
50c943f53fSJed Brown   }
5140e23c03SJunchao Zhang   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
5240e23c03SJunchao Zhang   for (; i<bas->niranks; i++) {
53c943f53fSJed Brown     bas->iranks[i] = iranks[i-bas->ndiranks];
54c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
55c943f53fSJed Brown   }
56c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
5795fce210SBarry Smith   ierr = PetscFree(rlengths);CHKERRQ(ierr);
58c943f53fSJed Brown   ierr = PetscFree(iranks);CHKERRQ(ierr);
59c943f53fSJed Brown   ierr = PetscFree(ilengths);CHKERRQ(ierr);
6095fce210SBarry Smith 
6195fce210SBarry Smith   /* Send leaf identities to roots */
6271438e86SJunchao Zhang   nRemoteLeafRanks = bas->niranks-bas->ndiranks;
63c943f53fSJed Brown   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
6471438e86SJunchao Zhang   ierr = PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs);CHKERRQ(ierr);
6540e23c03SJunchao Zhang   for (i=bas->ndiranks; i<bas->niranks; i++) {
66ffc4695bSBarry Smith     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr);
6740e23c03SJunchao Zhang   }
6840e23c03SJunchao Zhang   for (i=0; i<sf->nranks; i++) {
6995fce210SBarry Smith     PetscMPIInt npoints;
7095fce210SBarry Smith     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
7140e23c03SJunchao Zhang     if (i < sf->ndranks) {
7240e23c03SJunchao Zhang       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
7340e23c03SJunchao Zhang       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
7440e23c03SJunchao Zhang       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
7540e23c03SJunchao Zhang       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
76c943f53fSJed Brown       continue;
77c943f53fSJed Brown     }
78ffc4695bSBarry Smith     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr);
79bf39f1bfSJed Brown   }
8071438e86SJunchao Zhang   ierr = MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
8171438e86SJunchao Zhang   ierr = MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
8295fce210SBarry Smith 
8371438e86SJunchao Zhang   sf->nleafreqs  = nRemoteRootRanks;
8471438e86SJunchao Zhang   bas->nrootreqs = nRemoteLeafRanks;
85cd620004SJunchao Zhang   sf->persistent = PETSC_TRUE;
86eb02082bSJunchao Zhang 
8771438e86SJunchao Zhang   /* Setup fields related to packing, such as rootbuflen[] */
88cd620004SJunchao Zhang   ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr);
8971438e86SJunchao Zhang   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
9095fce210SBarry Smith   PetscFunctionReturn(0);
9195fce210SBarry Smith }
9295fce210SBarry Smith 
9340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
9495fce210SBarry Smith {
9595fce210SBarry Smith   PetscErrorCode    ierr;
96cd620004SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
9771438e86SJunchao Zhang   PetscSFLink       link = bas->avail,next;
9895fce210SBarry Smith 
9995fce210SBarry Smith   PetscFunctionBegin;
10029046d53SLisandro Dalcin   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
101c943f53fSJed Brown   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
102c943f53fSJed Brown   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
10371438e86SJunchao Zhang 
1047fd2d3dbSJunchao Zhang  #if defined(PETSC_HAVE_DEVICE)
10520c24465SJunchao Zhang   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);}
106eb02082bSJunchao Zhang  #endif
10771438e86SJunchao Zhang 
10871438e86SJunchao Zhang  #if defined(PETSC_HAVE_NVSHMEM)
10971438e86SJunchao Zhang   ierr = PetscSFReset_Basic_NVSHMEM(sf);CHKERRQ(ierr);
11071438e86SJunchao Zhang  #endif
11171438e86SJunchao Zhang 
11271438e86SJunchao Zhang   for (; link; link=next) {next = link->next; ierr = PetscSFLinkDestroy(sf,link);CHKERRQ(ierr);}
11371438e86SJunchao Zhang   bas->avail = NULL;
114cd620004SJunchao Zhang   ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr);
11595fce210SBarry Smith   PetscFunctionReturn(0);
11695fce210SBarry Smith }
11795fce210SBarry Smith 
11840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
11995fce210SBarry Smith {
12095fce210SBarry Smith   PetscErrorCode ierr;
12195fce210SBarry Smith 
12295fce210SBarry Smith   PetscFunctionBegin;
123f6d956f6SStefano Zampini   ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr);
12495fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
12595fce210SBarry Smith   PetscFunctionReturn(0);
12695fce210SBarry Smith }
12795fce210SBarry Smith 
12862152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY)
12962152dedSBarry Smith #include <petscmat.h>
13062152dedSBarry Smith 
13162152dedSBarry Smith PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer)
13262152dedSBarry Smith {
13362152dedSBarry Smith   PetscErrorCode       ierr;
13462152dedSBarry Smith   PetscSF_Basic        *bas = (PetscSF_Basic*)sf->data;
135*53dd6d7dSJunchao Zhang   PetscInt             i,nrootranks,ndrootranks;
13662152dedSBarry Smith   const PetscInt       *rootoffset;
13762152dedSBarry Smith   PetscMPIInt          rank,size;
138*53dd6d7dSJunchao Zhang   const PetscMPIInt    *rootranks;
13962152dedSBarry Smith   MPI_Comm             comm = PetscObjectComm((PetscObject)sf);
140*53dd6d7dSJunchao Zhang   PetscScalar          unitbytes;
14162152dedSBarry Smith   Mat                  A;
14262152dedSBarry Smith 
14362152dedSBarry Smith   PetscFunctionBegin;
14462152dedSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
14562152dedSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
146*53dd6d7dSJunchao Zhang   /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
147*53dd6d7dSJunchao Zhang     PetscSFBcast, i.e., roots send data to leaves.  We dump the communication pattern into a matrix
148*53dd6d7dSJunchao Zhang     in senders' view point: how many bytes I will send to my neighbors.
149*53dd6d7dSJunchao Zhang 
150*53dd6d7dSJunchao Zhang     Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
151*53dd6d7dSJunchao Zhang 
152*53dd6d7dSJunchao Zhang     If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
153*53dd6d7dSJunchao Zhang     different interpretations for the same SF for different data types. Since we most care about VecScatter,
154*53dd6d7dSJunchao Zhang     we uniformly treat each vertex as a PetscScalar.
155*53dd6d7dSJunchao Zhang   */
156*53dd6d7dSJunchao Zhang   unitbytes = (PetscScalar)sizeof(PetscScalar);
157*53dd6d7dSJunchao Zhang 
158*53dd6d7dSJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL);CHKERRQ(ierr);
159*53dd6d7dSJunchao Zhang   ierr = MatCreateAIJ(comm,1,1,size,size,1,NULL,nrootranks-ndrootranks,NULL,&A);CHKERRQ(ierr);
160*53dd6d7dSJunchao Zhang   ierr = MatSetOptionsPrefix(A,"__petsc_internal__");CHKERRQ(ierr); /* To prevent the internal A from taking any command line options */
16162152dedSBarry Smith   for (i=0; i<nrootranks; i++) {
162*53dd6d7dSJunchao Zhang     ierr = MatSetValue(A,(PetscInt)rank,bas->iranks[i],(rootoffset[i+1]-rootoffset[i])*unitbytes,INSERT_VALUES);CHKERRQ(ierr);
16362152dedSBarry Smith   }
16462152dedSBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16562152dedSBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16662152dedSBarry Smith   ierr = MatView(A,viewer);CHKERRQ(ierr);
16762152dedSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
16862152dedSBarry Smith   PetscFunctionReturn(0);
16962152dedSBarry Smith }
17062152dedSBarry Smith #endif
17162152dedSBarry Smith 
17240e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
17395fce210SBarry Smith {
17495fce210SBarry Smith   PetscErrorCode ierr;
175*53dd6d7dSJunchao Zhang   PetscBool      isascii;
17695fce210SBarry Smith 
17795fce210SBarry Smith   PetscFunctionBegin;
178*53dd6d7dSJunchao Zhang   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
179*53dd6d7dSJunchao Zhang   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {ierr = PetscViewerASCIIPrintf(viewer,"  MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
18062152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY)
181*53dd6d7dSJunchao Zhang   else {
182*53dd6d7dSJunchao Zhang     PetscBool  isdraw,isbinary;
183*53dd6d7dSJunchao Zhang     ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
184*53dd6d7dSJunchao Zhang     ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
185*53dd6d7dSJunchao Zhang     if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) {
186*53dd6d7dSJunchao Zhang       ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr);
187*53dd6d7dSJunchao Zhang     }
18862152dedSBarry Smith   }
18962152dedSBarry Smith #endif
19095fce210SBarry Smith   PetscFunctionReturn(0);
19195fce210SBarry Smith }
19295fce210SBarry Smith 
193ad227feaSJunchao Zhang static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
19495fce210SBarry Smith {
19595fce210SBarry Smith   PetscErrorCode    ierr;
196cd620004SJunchao Zhang   PetscSFLink       link = NULL;
19795fce210SBarry Smith 
19895fce210SBarry Smith   PetscFunctionBegin;
19971438e86SJunchao Zhang   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
200cd620004SJunchao Zhang   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr);
20171438e86SJunchao Zhang   /* Pack rootdata to rootbuf for remote communication */
202cd620004SJunchao Zhang   ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr);
20371438e86SJunchao Zhang   /* Start communcation, e.g., post MPI_Isend */
20471438e86SJunchao Zhang   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
20571438e86SJunchao Zhang   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
20671438e86SJunchao Zhang   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr);
20795fce210SBarry Smith   PetscFunctionReturn(0);
20895fce210SBarry Smith }
20995fce210SBarry Smith 
210ad227feaSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
21195fce210SBarry Smith {
21295fce210SBarry Smith   PetscErrorCode    ierr;
213cd620004SJunchao Zhang   PetscSFLink       link = NULL;
21495fce210SBarry Smith 
21595fce210SBarry Smith   PetscFunctionBegin;
216cd620004SJunchao Zhang   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
217cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
21871438e86SJunchao Zhang   /* Finish remote communication, e.g., post MPI_Waitall */
21971438e86SJunchao Zhang   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
22071438e86SJunchao Zhang   /* Unpack data in leafbuf to leafdata for remote communication */
221cd620004SJunchao Zhang   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr);
22271438e86SJunchao Zhang   /* Recycle the link */
223cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
224cd620004SJunchao Zhang   PetscFunctionReturn(0);
225cd620004SJunchao Zhang }
226cd620004SJunchao Zhang 
227cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */
228cd620004SJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
229cd620004SJunchao Zhang {
230cd620004SJunchao Zhang   PetscErrorCode    ierr;
23171438e86SJunchao Zhang   PetscSFLink       link = NULL;
232cd620004SJunchao Zhang 
233cd620004SJunchao Zhang   PetscFunctionBegin;
234cd620004SJunchao Zhang   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr);
235cd620004SJunchao Zhang   ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr);
23671438e86SJunchao Zhang   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
237cd620004SJunchao Zhang   *out = link;
23895fce210SBarry Smith   PetscFunctionReturn(0);
23995fce210SBarry Smith }
24095fce210SBarry Smith 
24195fce210SBarry Smith /* leaf -> root with reduction */
242eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
24395fce210SBarry Smith {
24495fce210SBarry Smith   PetscErrorCode    ierr;
245cd620004SJunchao Zhang   PetscSFLink       link = NULL;
24695fce210SBarry Smith 
24795fce210SBarry Smith   PetscFunctionBegin;
248cd620004SJunchao Zhang   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr);
24971438e86SJunchao Zhang   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr);
25095fce210SBarry Smith   PetscFunctionReturn(0);
25195fce210SBarry Smith }
25295fce210SBarry Smith 
25300816365SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
25495fce210SBarry Smith {
25595fce210SBarry Smith   PetscErrorCode    ierr;
256cd620004SJunchao Zhang   PetscSFLink       link = NULL;
25795fce210SBarry Smith 
25895fce210SBarry Smith   PetscFunctionBegin;
259cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
26071438e86SJunchao Zhang   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
261cd620004SJunchao Zhang   ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr);
262cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
26395fce210SBarry Smith   PetscFunctionReturn(0);
26495fce210SBarry Smith }
26595fce210SBarry Smith 
266eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
26795fce210SBarry Smith {
26895fce210SBarry Smith   PetscErrorCode    ierr;
269cd620004SJunchao Zhang   PetscSFLink       link = NULL;
27095fce210SBarry Smith 
27195fce210SBarry Smith   PetscFunctionBegin;
272cd620004SJunchao Zhang   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr);
273cd620004SJunchao Zhang   ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
27495fce210SBarry Smith   PetscFunctionReturn(0);
27595fce210SBarry Smith }
27695fce210SBarry Smith 
27700816365SJunchao Zhang static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
27895fce210SBarry Smith {
27995fce210SBarry Smith   PetscErrorCode    ierr;
280cd620004SJunchao Zhang   PetscSFLink       link = NULL;
28195fce210SBarry Smith 
28295fce210SBarry Smith   PetscFunctionBegin;
283cd620004SJunchao Zhang   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
28495fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
28571438e86SJunchao Zhang   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
286cd620004SJunchao Zhang   /* Do fetch-and-op, the (remote) update results are in rootbuf */
28771438e86SJunchao Zhang   ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr);
288cd620004SJunchao Zhang   /* Bcast rootbuf to leafupdate */
28971438e86SJunchao Zhang   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
29071438e86SJunchao Zhang   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
291b23bfdefSJunchao Zhang   /* Unpack and insert fetched data into leaves */
29283df288dSJunchao Zhang   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr);
293cd620004SJunchao Zhang   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
29495fce210SBarry Smith   PetscFunctionReturn(0);
29595fce210SBarry Smith }
29695fce210SBarry Smith 
29740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
2988750ddebSJunchao Zhang {
2998750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
3008750ddebSJunchao Zhang 
3018750ddebSJunchao Zhang   PetscFunctionBegin;
3028750ddebSJunchao Zhang   if (niranks)  *niranks  = bas->niranks;
3038750ddebSJunchao Zhang   if (iranks)   *iranks   = bas->iranks;
3048750ddebSJunchao Zhang   if (ioffset)  *ioffset  = bas->ioffset;
3058750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
3068750ddebSJunchao Zhang   PetscFunctionReturn(0);
3078750ddebSJunchao Zhang }
3088750ddebSJunchao Zhang 
30972502a1fSJunchao Zhang /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf.
310f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
311f659e5c7SJunchao Zhang    was sorted before calling the routine.
312f659e5c7SJunchao Zhang  */
31372502a1fSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
314f659e5c7SJunchao Zhang {
315f659e5c7SJunchao Zhang   PetscSF           esf;
316cd620004SJunchao Zhang   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
317cd620004SJunchao Zhang   PetscInt          i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
318cd620004SJunchao Zhang   char              *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */
319f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
320f659e5c7SJunchao Zhang   const PetscMPIInt *ranks,*iranks;
321cd620004SJunchao Zhang   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
322f659e5c7SJunchao Zhang   PetscBool         connected;
323f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
324f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
325f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
326f659e5c7SJunchao Zhang 
327f659e5c7SJunchao Zhang   PetscFunctionBegin;
328f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
32920c24465SJunchao Zhang   ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr);
330f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
331f659e5c7SJunchao Zhang 
332cd620004SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
333f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
334f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
335cd620004SJunchao Zhang   maxlocal = maxleaf - minleaf + 1;
336cd620004SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
337cd620004SJunchao Zhang   leafdata = leafmem - minleaf;
338f659e5c7SJunchao Zhang   /* Tag selected roots */
339f659e5c7SJunchao Zhang   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
340f659e5c7SJunchao Zhang 
341ad227feaSJunchao Zhang   ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
342ad227feaSJunchao Zhang   ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
343f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
344cd620004SJunchao Zhang   esf_nranks = esf_ndranks = esf_nleaves = 0;
345b23bfdefSJunchao Zhang   for (i=0; i<nranks; i++) {
346cd620004SJunchao Zhang     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
347cd620004SJunchao Zhang     for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}}
348f659e5c7SJunchao Zhang     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
349f659e5c7SJunchao Zhang   }
350f659e5c7SJunchao Zhang 
351f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
352cd620004SJunchao Zhang   ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
353cd620004SJunchao Zhang   ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
354cd620004SJunchao Zhang   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr);
355f659e5c7SJunchao Zhang   p    = 0; /* Counter for connected root ranks */
356f659e5c7SJunchao Zhang   q    = 0; /* Counter for connected leaves */
357f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
358f659e5c7SJunchao Zhang   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
359f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
360cd620004SJunchao Zhang     for (j=roffset[i]; j<roffset[i+1]; j++) {
361cd620004SJunchao Zhang       if (leafdata[rmine[j]]) {
362f659e5c7SJunchao Zhang         esf_rmine[q]         = new_ilocal[q] = rmine[j];
363f659e5c7SJunchao Zhang         esf_rremote[q]       = rremote[j];
364f659e5c7SJunchao Zhang         new_iremote[q].index = rremote[j];
365f659e5c7SJunchao Zhang         new_iremote[q].rank  = ranks[i];
366f659e5c7SJunchao Zhang         connected            = PETSC_TRUE;
367f659e5c7SJunchao Zhang         q++;
368f659e5c7SJunchao Zhang       }
369f659e5c7SJunchao Zhang     }
370f659e5c7SJunchao Zhang     if (connected) {
371f659e5c7SJunchao Zhang       esf_ranks[p]     = ranks[i];
372f659e5c7SJunchao Zhang       esf_roffset[p+1] = q;
373f659e5c7SJunchao Zhang       p++;
374f659e5c7SJunchao Zhang     }
375f659e5c7SJunchao Zhang   }
376f659e5c7SJunchao Zhang 
377f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
378cd620004SJunchao Zhang   ierr           = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
379f659e5c7SJunchao Zhang   esf->nranks    = esf_nranks;
380f659e5c7SJunchao Zhang   esf->ndranks   = esf_ndranks;
381f659e5c7SJunchao Zhang   esf->ranks     = esf_ranks;
382f659e5c7SJunchao Zhang   esf->roffset   = esf_roffset;
383f659e5c7SJunchao Zhang   esf->rmine     = esf_rmine;
384f659e5c7SJunchao Zhang   esf->rremote   = esf_rremote;
385cd620004SJunchao Zhang   esf->nleafreqs = esf_nranks - esf_ndranks;
386f659e5c7SJunchao Zhang 
387f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
388f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
389f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
390f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
391cd620004SJunchao Zhang      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
392f659e5c7SJunchao Zhang    */
393f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
394f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
395f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
396f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
397f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
398f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
399f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
400f659e5c7SJunchao Zhang     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
401cd620004SJunchao Zhang       if (rootdata[irootloc[j]]) {
402f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
403f659e5c7SJunchao Zhang         connected = PETSC_TRUE;
404f659e5c7SJunchao Zhang       }
405f659e5c7SJunchao Zhang     }
406f659e5c7SJunchao Zhang     if (connected) {
407f659e5c7SJunchao Zhang       bas->niranks++;
408f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
409f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
410f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
411f659e5c7SJunchao Zhang       p++;
412f659e5c7SJunchao Zhang     }
413f659e5c7SJunchao Zhang   }
414f659e5c7SJunchao Zhang   bas->itotal     = q;
415cd620004SJunchao Zhang   bas->nrootreqs  = bas->niranks - bas->ndiranks;
416cd620004SJunchao Zhang   esf->persistent = PETSC_TRUE;
417cd620004SJunchao Zhang   /* Setup packing related fields */
418cd620004SJunchao Zhang   ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr);
419f659e5c7SJunchao Zhang 
42020c24465SJunchao Zhang   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
42120c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
42220c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_CUDA) {
42371438e86SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_CUDA;
42471438e86SJunchao Zhang     esf->ops->Free   = PetscSFFree_CUDA;
42520c24465SJunchao Zhang   }
42620c24465SJunchao Zhang #endif
42720c24465SJunchao Zhang 
42859af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
42959af0bd3SScott Kruger   /* TODO: Needs debugging */
43059af0bd3SScott Kruger   if (esf->backend == PETSCSF_BACKEND_HIP) {
43159af0bd3SScott Kruger     esf->ops->Malloc = PetscSFMalloc_HIP;
43259af0bd3SScott Kruger     esf->ops->Free   = PetscSFFree_HIP;
43359af0bd3SScott Kruger   }
43459af0bd3SScott Kruger #endif
43559af0bd3SScott Kruger 
43620c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
43720c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
43820c24465SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_Kokkos;
43920c24465SJunchao Zhang     esf->ops->Free   = PetscSFFree_Kokkos;
44020c24465SJunchao Zhang   }
44120c24465SJunchao Zhang #endif
442f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
443cd620004SJunchao Zhang   ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
444f659e5c7SJunchao Zhang   *newsf = esf;
445f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
446f659e5c7SJunchao Zhang }
447f659e5c7SJunchao Zhang 
4488cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
44995fce210SBarry Smith {
45040e23c03SJunchao Zhang   PetscSF_Basic  *dat;
45195fce210SBarry Smith   PetscErrorCode ierr;
45295fce210SBarry Smith 
45395fce210SBarry Smith   PetscFunctionBegin;
45495fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
45595fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
45695fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
45795fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
458ad227feaSJunchao Zhang   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
459ad227feaSJunchao Zhang   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
46095fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
46195fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
46295fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
46395fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
4648750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
46572502a1fSJunchao Zhang   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
46695fce210SBarry Smith 
46740e23c03SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
46840e23c03SJunchao Zhang   sf->data = (void*)dat;
46995fce210SBarry Smith   PetscFunctionReturn(0);
47095fce210SBarry Smith }
471