xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision b23bfdefca792e3d9f47034521b8d4c437693123)
18cd53115SBarry Smith 
240e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
395fce210SBarry Smith 
440e23c03SJunchao Zhang /*===================================================================================*/
540e23c03SJunchao Zhang /*              Internal routines for PetscSFPack_Basic                              */
640e23c03SJunchao Zhang /*===================================================================================*/
795fce210SBarry Smith 
840e23c03SJunchao Zhang /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
940e23c03SJunchao Zhang    initialized (since we use persistent requests), then initialize them.
1095fce210SBarry Smith */
11*b23bfdefSJunchao Zhang static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
1240e23c03SJunchao Zhang {
13*b23bfdefSJunchao Zhang   PetscErrorCode ierr;
1440e23c03SJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
1540e23c03SJunchao Zhang   MPI_Request    *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half;
16*b23bfdefSJunchao Zhang   PetscInt       i,j,nrootranks,ndrootranks,nleafranks,ndleafranks,disp;
1740e23c03SJunchao Zhang   const PetscInt *rootoffset,*leafoffset;
1840e23c03SJunchao Zhang   PetscMPIInt    n;
1940e23c03SJunchao Zhang   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
20*b23bfdefSJunchao Zhang   MPI_Datatype   unit;
2195fce210SBarry Smith 
2240e23c03SJunchao Zhang   PetscFunctionBegin;
2340e23c03SJunchao Zhang   if (!link->initialized[direction]) {
2440e23c03SJunchao Zhang     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
2540e23c03SJunchao Zhang     ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
26*b23bfdefSJunchao Zhang     unit = link->unit;
27*b23bfdefSJunchao Zhang     j    = 0; /* request counter */
2840e23c03SJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
29*b23bfdefSJunchao Zhang       for (i=ndrootranks; i<nrootranks; i++) {
30*b23bfdefSJunchao Zhang         disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
3140e23c03SJunchao Zhang         ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
32*b23bfdefSJunchao Zhang         ierr = MPI_Recv_init(link->rootbuf+disp,n,unit,bas->iranks[i],link->tag,comm,&requests[j++]);CHKERRQ(ierr);
3340e23c03SJunchao Zhang       }
34*b23bfdefSJunchao Zhang       for (i=ndleafranks; i<nleafranks; i++) {
35*b23bfdefSJunchao Zhang         disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
3640e23c03SJunchao Zhang         ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
37*b23bfdefSJunchao Zhang         ierr = MPI_Send_init(link->leafbuf+disp,n,unit,sf->ranks[i],link->tag,comm,&requests[j++]);CHKERRQ(ierr);
3840e23c03SJunchao Zhang       }
3940e23c03SJunchao Zhang     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
40*b23bfdefSJunchao Zhang       for (i=ndrootranks; i<nrootranks; i++) {
41*b23bfdefSJunchao Zhang         disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
4240e23c03SJunchao Zhang         ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
43*b23bfdefSJunchao Zhang         ierr = MPI_Send_init(link->rootbuf+disp,n,unit,bas->iranks[i],link->tag,comm,&requests[j++]);CHKERRQ(ierr);
4440e23c03SJunchao Zhang       }
45*b23bfdefSJunchao Zhang       for (i=ndleafranks; i<nleafranks; i++) {
46*b23bfdefSJunchao Zhang         disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
4740e23c03SJunchao Zhang         ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
48*b23bfdefSJunchao Zhang         ierr = MPI_Recv_init(link->leafbuf+disp,n,unit,sf->ranks[i],link->tag,comm,&requests[j++]);CHKERRQ(ierr);
4940e23c03SJunchao Zhang       }
5040e23c03SJunchao Zhang     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction);
5140e23c03SJunchao Zhang 
5240e23c03SJunchao Zhang     link->initialized[direction] = PETSC_TRUE;
5395fce210SBarry Smith   }
5495fce210SBarry Smith 
5540e23c03SJunchao Zhang   if (rootreqs) *rootreqs = requests;
56*b23bfdefSJunchao Zhang   if (leafreqs) *leafreqs = requests + (bas->niranks - bas->ndiranks);
5740e23c03SJunchao Zhang   PetscFunctionReturn(0);
5895fce210SBarry Smith }
5995fce210SBarry Smith 
60*b23bfdefSJunchao Zhang /* Common part shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs. */
61*b23bfdefSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF sf,MPI_Datatype unit,const void *rootdata,const void *leafdata,PetscInt half,PetscSFPack_Basic *mylink)
6240e23c03SJunchao Zhang {
6340e23c03SJunchao Zhang   PetscErrorCode    ierr;
64*b23bfdefSJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
65*b23bfdefSJunchao Zhang   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks,rbufsize,lbufsize,sbufsize;
6640e23c03SJunchao Zhang   const PetscInt    *rootoffset,*leafoffset;
6740e23c03SJunchao Zhang   PetscSFPack       *p;
6840e23c03SJunchao Zhang   PetscSFPack_Basic link;
69*b23bfdefSJunchao Zhang   PetscBool         match;
7040e23c03SJunchao Zhang 
7140e23c03SJunchao Zhang   PetscFunctionBegin;
72*b23bfdefSJunchao Zhang   ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);CHKERRQ(ierr);
739d1c8addSJunchao Zhang 
7440e23c03SJunchao Zhang   /* Look for types in cache */
7540e23c03SJunchao Zhang   for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) {
7640e23c03SJunchao Zhang     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
7740e23c03SJunchao Zhang     if (match) {
7840e23c03SJunchao Zhang       *p = link->next; /* Remove from available list */
7940e23c03SJunchao Zhang       goto found;
8040e23c03SJunchao Zhang     }
8153deab39SPeter Brune   }
8253deab39SPeter Brune 
8340e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
8440e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
8540e23c03SJunchao Zhang   ierr = PetscNew(&link);CHKERRQ(ierr);
8640e23c03SJunchao Zhang   ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr);
87*b23bfdefSJunchao Zhang   ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); /* One tag per link */
8853deab39SPeter Brune 
89*b23bfdefSJunchao Zhang   /* Allocate root, leaf, self buffers, and MPI requests. We double the requests since there are leaf2root and root2leaf communications. */
90*b23bfdefSJunchao Zhang   rbufsize   = (rootoffset[nrootranks]-rootoffset[ndrootranks])*link->unitbytes;
91*b23bfdefSJunchao Zhang   lbufsize   = (leafoffset[nleafranks]-leafoffset[ndleafranks])*link->unitbytes;
92*b23bfdefSJunchao Zhang   sbufsize   = rootoffset[ndrootranks]*link->unitbytes;
93*b23bfdefSJunchao Zhang   link->half = half;
94*b23bfdefSJunchao Zhang   ierr       = PetscMalloc4(rbufsize,&link->rootbuf,lbufsize,&link->leafbuf,sbufsize,&link->selfbuf,link->half*2,&link->requests);CHKERRQ(ierr);
95*b23bfdefSJunchao Zhang   for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */
9653deab39SPeter Brune 
9740e23c03SJunchao Zhang found:
98*b23bfdefSJunchao Zhang   link->rkey = rootdata;
99*b23bfdefSJunchao Zhang   link->lkey = leafdata;
10040e23c03SJunchao Zhang   link->next = bas->inuse;
10140e23c03SJunchao Zhang   bas->inuse = (PetscSFPack)link;
10240e23c03SJunchao Zhang 
10340e23c03SJunchao Zhang   *mylink    = link;
10440e23c03SJunchao Zhang   PetscFunctionReturn(0);
10595fce210SBarry Smith }
10695fce210SBarry Smith 
107*b23bfdefSJunchao Zhang static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,const void *leafdata,PetscSFDirection direction,PetscSFPack_Basic *mylink)
108*b23bfdefSJunchao Zhang {
109*b23bfdefSJunchao Zhang   PetscErrorCode    ierr;
110*b23bfdefSJunchao Zhang   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks,half;
111*b23bfdefSJunchao Zhang 
112*b23bfdefSJunchao Zhang   PetscFunctionBegin;
113*b23bfdefSJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);CHKERRQ(ierr);
114*b23bfdefSJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
115*b23bfdefSJunchao Zhang   half = (nrootranks - ndrootranks) + (nleafranks - ndleafranks);
116*b23bfdefSJunchao Zhang   ierr = PetscSFPackGet_Basic_Common(sf,unit,rootdata,leafdata,half,mylink);CHKERRQ(ierr);
117*b23bfdefSJunchao Zhang   PetscFunctionReturn(0);
118*b23bfdefSJunchao Zhang }
119*b23bfdefSJunchao Zhang 
12040e23c03SJunchao Zhang /*===================================================================================*/
12140e23c03SJunchao Zhang /*              SF public interface implementations                                  */
12240e23c03SJunchao Zhang /*===================================================================================*/
12340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
12495fce210SBarry Smith {
12595fce210SBarry Smith   PetscErrorCode ierr;
126*b23bfdefSJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
12795fce210SBarry Smith   PetscInt       *rlengths,*ilengths,i;
12840e23c03SJunchao Zhang   PetscMPIInt    rank,niranks,*iranks,tag;
12995fce210SBarry Smith   MPI_Comm       comm;
130b5a8e515SJed Brown   MPI_Group      group;
13140e23c03SJunchao Zhang   MPI_Request    *rootreqs,*leafreqs;
13295fce210SBarry Smith 
13395fce210SBarry Smith   PetscFunctionBegin;
134b5a8e515SJed Brown   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
135b5a8e515SJed Brown   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
136b5a8e515SJed Brown   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
13795fce210SBarry Smith   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
13840e23c03SJunchao Zhang   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
139c943f53fSJed Brown   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
14095fce210SBarry Smith   /*
14195fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
14295fce210SBarry Smith    */
143785e854fSJed Brown   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
14495fce210SBarry Smith   /* Determine number, sending ranks, and length of incoming */
14595fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
14695fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
14795fce210SBarry Smith   }
14840e23c03SJunchao Zhang   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
149c943f53fSJed Brown 
1500b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
1510b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
1520b899082SJunchao Zhang      small and the sorting is cheap.
1530b899082SJunchao Zhang    */
1540b899082SJunchao Zhang   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
1550b899082SJunchao Zhang 
156c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
157c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
158c943f53fSJed Brown   bas->niranks = bas->ndiranks + niranks;
159c943f53fSJed Brown   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
160c943f53fSJed Brown   bas->ioffset[0] = 0;
161c943f53fSJed Brown   for (i=0; i<bas->ndiranks; i++) {
162c943f53fSJed Brown     bas->iranks[i] = sf->ranks[i];
163c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
164c943f53fSJed Brown   }
16540e23c03SJunchao Zhang   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
16640e23c03SJunchao Zhang   for ( ; i<bas->niranks; i++) {
167c943f53fSJed Brown     bas->iranks[i] = iranks[i-bas->ndiranks];
168c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
169c943f53fSJed Brown   }
170c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
17195fce210SBarry Smith   ierr = PetscFree(rlengths);CHKERRQ(ierr);
172c943f53fSJed Brown   ierr = PetscFree(iranks);CHKERRQ(ierr);
173c943f53fSJed Brown   ierr = PetscFree(ilengths);CHKERRQ(ierr);
17495fce210SBarry Smith 
17595fce210SBarry Smith   /* Send leaf identities to roots */
176c943f53fSJed Brown   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
17740e23c03SJunchao Zhang   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
17840e23c03SJunchao Zhang   for (i=bas->ndiranks; i<bas->niranks; i++) {
17940e23c03SJunchao Zhang     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr);
18040e23c03SJunchao Zhang   }
18140e23c03SJunchao Zhang   for (i=0; i<sf->nranks; i++) {
18295fce210SBarry Smith     PetscMPIInt npoints;
18395fce210SBarry Smith     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
18440e23c03SJunchao Zhang     if (i < sf->ndranks) {
18540e23c03SJunchao Zhang       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
18640e23c03SJunchao Zhang       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
18740e23c03SJunchao Zhang       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
18840e23c03SJunchao Zhang       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
189c943f53fSJed Brown       continue;
190c943f53fSJed Brown     }
19140e23c03SJunchao Zhang     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr);
192bf39f1bfSJed Brown   }
19340e23c03SJunchao Zhang   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
19440e23c03SJunchao Zhang   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
19540e23c03SJunchao Zhang   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
19695fce210SBarry Smith 
197*b23bfdefSJunchao Zhang   /* Setup packing optimization for roots and leaves */
198*b23bfdefSJunchao Zhang   ierr = PetscSFPackSetupOptimization_Basic(sf);CHKERRQ(ierr);
19995fce210SBarry Smith   PetscFunctionReturn(0);
20095fce210SBarry Smith }
20195fce210SBarry Smith 
2024416b707SBarry Smith static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
20395fce210SBarry Smith {
20495fce210SBarry Smith   PetscErrorCode ierr;
20595fce210SBarry Smith 
20695fce210SBarry Smith   PetscFunctionBegin;
207e55864a3SBarry Smith   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr);
20895fce210SBarry Smith   ierr = PetscOptionsTail();CHKERRQ(ierr);
20995fce210SBarry Smith   PetscFunctionReturn(0);
21095fce210SBarry Smith }
21195fce210SBarry Smith 
21240e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
21395fce210SBarry Smith {
21495fce210SBarry Smith   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
21595fce210SBarry Smith   PetscErrorCode    ierr;
21640e23c03SJunchao Zhang   PetscSFPack_Basic link,next;
217*b23bfdefSJunchao Zhang   PetscInt          i;
21895fce210SBarry Smith 
21995fce210SBarry Smith   PetscFunctionBegin;
22029046d53SLisandro Dalcin   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
221c943f53fSJed Brown   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
222c943f53fSJed Brown   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
22340e23c03SJunchao Zhang   for (link=(PetscSFPack_Basic)bas->avail; link; link=next) {
22440e23c03SJunchao Zhang     next = (PetscSFPack_Basic)link->next;
22577b7e48dSJunchao Zhang     if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);}
226*b23bfdefSJunchao Zhang     for (i=0; i<link->half*2; i++) { /* Persistent reqs must be freed. */
22740e23c03SJunchao Zhang       if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);}
22830e38525SJunchao Zhang     }
229*b23bfdefSJunchao Zhang     ierr = PetscFree4(link->rootbuf,link->leafbuf,link->selfbuf,link->requests);CHKERRQ(ierr);
23095fce210SBarry Smith     ierr = PetscFree(link);CHKERRQ(ierr);
23195fce210SBarry Smith   }
23295fce210SBarry Smith   bas->avail = NULL;
233*b23bfdefSJunchao Zhang   ierr = PetscSFPackDestroyOptimization_Basic(sf);CHKERRQ(ierr);
23495fce210SBarry Smith   PetscFunctionReturn(0);
23595fce210SBarry Smith }
23695fce210SBarry Smith 
23740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
23895fce210SBarry Smith {
23995fce210SBarry Smith   PetscErrorCode ierr;
24095fce210SBarry Smith 
24195fce210SBarry Smith   PetscFunctionBegin;
24240e23c03SJunchao Zhang   ierr = PetscSFReset(sf);CHKERRQ(ierr);
24395fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
24495fce210SBarry Smith   PetscFunctionReturn(0);
24595fce210SBarry Smith }
24695fce210SBarry Smith 
24740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
24895fce210SBarry Smith {
24995fce210SBarry Smith   PetscErrorCode ierr;
25095fce210SBarry Smith   PetscBool      iascii;
25195fce210SBarry Smith 
25295fce210SBarry Smith   PetscFunctionBegin;
25395fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
254*b23bfdefSJunchao Zhang   if (iascii) {ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
25595fce210SBarry Smith   PetscFunctionReturn(0);
25695fce210SBarry Smith }
25795fce210SBarry Smith 
2583482bfa8SJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
25995fce210SBarry Smith {
26095fce210SBarry Smith   PetscErrorCode    ierr;
26140e23c03SJunchao Zhang   PetscSFPack_Basic link;
262*b23bfdefSJunchao Zhang   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks,cnt;
26395fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
26495fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
26595fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
26695fce210SBarry Smith 
26795fce210SBarry Smith   PetscFunctionBegin;
268*b23bfdefSJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL);CHKERRQ(ierr);
26940e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
2709d1c8addSJunchao Zhang   ierr = PetscSFPackGet_Basic(sf,unit,rootdata,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr);
271*b23bfdefSJunchao Zhang   ierr = PetscSFGetRootIndicesAtPlace_Basic(sf,PETSC_FALSE,&rootloc);
27295fce210SBarry Smith 
273*b23bfdefSJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
274*b23bfdefSJunchao Zhang   /* Post Irecv. Note distinguished ranks receive data via shared memory (i.e., not via MPI) */
275*b23bfdefSJunchao Zhang   cnt  = leafoffset[nleafranks]-leafoffset[ndleafranks];
276*b23bfdefSJunchao Zhang   ierr = MPI_Startall_irecv(cnt,unit,nleafranks-ndleafranks,leafreqs);CHKERRQ(ierr);
27730e38525SJunchao Zhang 
278*b23bfdefSJunchao Zhang   /* Do Isend */
279*b23bfdefSJunchao Zhang   ierr = PetscSFPackRootData(sf,(PetscSFPack)link,nrootranks,ndrootranks,rootoffset,rootloc,rootdata);CHKERRQ(ierr);
280*b23bfdefSJunchao Zhang   cnt = rootoffset[nrootranks] - rootoffset[ndrootranks];
281*b23bfdefSJunchao Zhang   ierr = MPI_Startall_isend(cnt,unit,nrootranks-ndrootranks,rootreqs);CHKERRQ(ierr);
28295fce210SBarry Smith   PetscFunctionReturn(0);
28395fce210SBarry Smith }
28495fce210SBarry Smith 
28540e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
28695fce210SBarry Smith {
28795fce210SBarry Smith   PetscErrorCode    ierr;
28840e23c03SJunchao Zhang   PetscSFPack_Basic link;
289*b23bfdefSJunchao Zhang   PetscInt          nleafranks,ndleafranks;
29095fce210SBarry Smith   const PetscInt    *leafoffset,*leafloc;
29195fce210SBarry Smith 
29295fce210SBarry Smith   PetscFunctionBegin;
2939d1c8addSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
29440e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
295*b23bfdefSJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
296*b23bfdefSJunchao Zhang   ierr = PetscSFGetLeafIndicesAtPlace_Basic(sf,PETSC_FALSE,&leafloc);
297*b23bfdefSJunchao Zhang   ierr = PetscSFUnpackAndOpLeafData(sf,(PetscSFPack)link,nleafranks,ndleafranks,leafoffset,leafloc,leafdata,op);CHKERRQ(ierr);
29840e23c03SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
29995fce210SBarry Smith   PetscFunctionReturn(0);
30095fce210SBarry Smith }
30195fce210SBarry Smith 
30295fce210SBarry Smith /* leaf -> root with reduction */
30340e23c03SJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
30495fce210SBarry Smith {
30540e23c03SJunchao Zhang   PetscSFPack_Basic link;
30695fce210SBarry Smith   PetscErrorCode    ierr;
307*b23bfdefSJunchao Zhang   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks,cnt;
30895fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
30995fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
31095fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
31195fce210SBarry Smith 
31295fce210SBarry Smith   PetscFunctionBegin;
31340e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
314*b23bfdefSJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,NULL,NULL);CHKERRQ(ierr);
315*b23bfdefSJunchao Zhang   ierr = PetscSFGetLeafIndicesAtPlace_Basic(sf,PETSC_FALSE,&leafloc);
31695fce210SBarry Smith 
317*b23bfdefSJunchao Zhang   ierr = PetscSFPackGet_Basic(sf,unit,rootdata,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr);
318*b23bfdefSJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr);
319c943f53fSJed Brown   /* Eagerly post root receives for non-distinguished ranks */
320*b23bfdefSJunchao Zhang   cnt  = rootoffset[nrootranks]-rootoffset[ndrootranks];
321*b23bfdefSJunchao Zhang   ierr = MPI_Startall_irecv(cnt,unit,nrootranks-ndrootranks,rootreqs);CHKERRQ(ierr);
32230e38525SJunchao Zhang 
32395fce210SBarry Smith   /* Pack and send leaf data */
324*b23bfdefSJunchao Zhang   ierr = PetscSFPackLeafData(sf,(PetscSFPack)link,nleafranks,ndleafranks,leafoffset,leafloc,leafdata);CHKERRQ(ierr);
325*b23bfdefSJunchao Zhang   cnt  = leafoffset[nleafranks] - leafoffset[ndleafranks];
326*b23bfdefSJunchao Zhang   ierr = MPI_Startall_isend(cnt,unit,nleafranks-ndleafranks,leafreqs);CHKERRQ(ierr);
32795fce210SBarry Smith   PetscFunctionReturn(0);
32895fce210SBarry Smith }
32995fce210SBarry Smith 
33040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
33195fce210SBarry Smith {
33295fce210SBarry Smith   PetscErrorCode    ierr;
33340e23c03SJunchao Zhang   PetscSFPack_Basic link;
334*b23bfdefSJunchao Zhang   PetscInt          nrootranks,ndrootranks;
33595fce210SBarry Smith   const PetscInt    *rootoffset,*rootloc;
33695fce210SBarry Smith 
33795fce210SBarry Smith   PetscFunctionBegin;
338*b23bfdefSJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
339*b23bfdefSJunchao Zhang   ierr = PetscSFGetRootIndicesAtPlace_Basic(sf,PETSC_FALSE,&rootloc);
3409d1c8addSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
34140e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
342*b23bfdefSJunchao Zhang   ierr = PetscSFUnpackAndOpRootData(sf,(PetscSFPack)link,nrootranks,ndrootranks,rootoffset,rootloc,rootdata,op);CHKERRQ(ierr);
34340e23c03SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
34495fce210SBarry Smith   PetscFunctionReturn(0);
34595fce210SBarry Smith }
34695fce210SBarry Smith 
34740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
34895fce210SBarry Smith {
34995fce210SBarry Smith   PetscErrorCode ierr;
35095fce210SBarry Smith 
35195fce210SBarry Smith   PetscFunctionBegin;
35240e23c03SJunchao Zhang   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
35395fce210SBarry Smith   PetscFunctionReturn(0);
35495fce210SBarry Smith }
35595fce210SBarry Smith 
35695fce210SBarry Smith static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
35795fce210SBarry Smith {
35895fce210SBarry Smith   PetscErrorCode    ierr;
35940e23c03SJunchao Zhang   PetscSFPack_Basic link;
360*b23bfdefSJunchao Zhang   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks,cnt;
36195fce210SBarry Smith   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
36295fce210SBarry Smith   const PetscMPIInt *rootranks,*leafranks;
36395fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
36495fce210SBarry Smith 
36595fce210SBarry Smith   PetscFunctionBegin;
366*b23bfdefSJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL);CHKERRQ(ierr);
367*b23bfdefSJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,NULL,NULL);CHKERRQ(ierr);
368*b23bfdefSJunchao Zhang   ierr = PetscSFGetRootIndicesAtPlace_Basic(sf,PETSC_FALSE,&rootloc);CHKERRQ(ierr);
369*b23bfdefSJunchao Zhang   ierr = PetscSFGetLeafIndicesAtPlace_Basic(sf,PETSC_FALSE,&leafloc);CHKERRQ(ierr);
3709d1c8addSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
37195fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
37240e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
373*b23bfdefSJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
37440e23c03SJunchao Zhang 
37595fce210SBarry Smith   /* Post leaf receives */
376*b23bfdefSJunchao Zhang   cnt  = leafoffset[nleafranks]-leafoffset[ndleafranks];
377*b23bfdefSJunchao Zhang   ierr = MPI_Startall_irecv(cnt,unit,nleafranks-ndleafranks,leafreqs);CHKERRQ(ierr);
37830e38525SJunchao Zhang 
37995fce210SBarry Smith   /* Process local fetch-and-op, post root sends */
380*b23bfdefSJunchao Zhang   ierr = PetscSFFetchAndOpRootData(sf,(PetscSFPack)link,nrootranks,ndrootranks,rootoffset,rootloc,rootdata,op);CHKERRQ(ierr);
381*b23bfdefSJunchao Zhang   cnt  = rootoffset[nrootranks] - rootoffset[ndrootranks];
382*b23bfdefSJunchao Zhang   ierr = MPI_Startall_isend(cnt,unit,nrootranks-ndrootranks,rootreqs);CHKERRQ(ierr);
383*b23bfdefSJunchao Zhang 
384*b23bfdefSJunchao Zhang   /* Unpack and insert fetched data into leaves */
38540e23c03SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
386*b23bfdefSJunchao Zhang   ierr = PetscSFUnpackAndOpLeafData(sf,(PetscSFPack)link,nleafranks,ndleafranks,leafoffset,leafloc,leafupdate,MPIU_REPLACE);CHKERRQ(ierr);
38740e23c03SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
38895fce210SBarry Smith   PetscFunctionReturn(0);
38995fce210SBarry Smith }
39095fce210SBarry Smith 
39140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
3928750ddebSJunchao Zhang {
3938750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
3948750ddebSJunchao Zhang 
3958750ddebSJunchao Zhang   PetscFunctionBegin;
3968750ddebSJunchao Zhang   if (niranks)  *niranks  = bas->niranks;
3978750ddebSJunchao Zhang   if (iranks)   *iranks   = bas->iranks;
3988750ddebSJunchao Zhang   if (ioffset)  *ioffset  = bas->ioffset;
3998750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
4008750ddebSJunchao Zhang   PetscFunctionReturn(0);
4018750ddebSJunchao Zhang }
4028750ddebSJunchao Zhang 
403f659e5c7SJunchao Zhang /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
404f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
405f659e5c7SJunchao Zhang    was sorted before calling the routine.
406f659e5c7SJunchao Zhang  */
407f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
408f659e5c7SJunchao Zhang {
409f659e5c7SJunchao Zhang   PetscSF           esf;
410*b23bfdefSJunchao Zhang   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote,count;
411*b23bfdefSJunchao Zhang   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
412f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
413f659e5c7SJunchao Zhang   const PetscMPIInt *ranks,*iranks;
414*b23bfdefSJunchao Zhang   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc,*buffer;
415f659e5c7SJunchao Zhang   PetscBool         connected;
416f659e5c7SJunchao Zhang   PetscSFPack_Basic link;
417f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
418f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
419f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
420f659e5c7SJunchao Zhang 
421f659e5c7SJunchao Zhang   PetscFunctionBegin;
422f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
423f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
424f659e5c7SJunchao Zhang 
425f659e5c7SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf */
426f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
427f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
428f659e5c7SJunchao Zhang   /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
429f659e5c7SJunchao Zhang   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
430f659e5c7SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
431f659e5c7SJunchao Zhang   /* Tag selected roots */
432f659e5c7SJunchao Zhang   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
433f659e5c7SJunchao Zhang 
434f659e5c7SJunchao Zhang   /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
435f659e5c7SJunchao Zhang      sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
436f659e5c7SJunchao Zhang      interested in since it tells which leaves are connected to which ranks.
437f659e5c7SJunchao Zhang    */
438f659e5c7SJunchao Zhang   ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,rootdata,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */
4399d1c8addSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
440f659e5c7SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
441f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
442f659e5c7SJunchao Zhang   esf_nranks = esf_ndranks = connected_leaves = 0;
443*b23bfdefSJunchao Zhang   for (i=0; i<nranks; i++) {
444f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
445*b23bfdefSJunchao Zhang     buffer    = i < ndranks? (PetscInt*)link->selfbuf : (PetscInt*)link->leafbuf + (roffset[i] - roffset[ndranks]);
446*b23bfdefSJunchao Zhang     count     = roffset[i+1] - roffset[i];
447*b23bfdefSJunchao Zhang     for (j=0; j<count; j++) {if (buffer[j]) {connected_leaves++; connected = PETSC_TRUE;}}
448f659e5c7SJunchao Zhang     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
449f659e5c7SJunchao Zhang   }
450f659e5c7SJunchao Zhang 
451f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
452f659e5c7SJunchao Zhang   ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
453f659e5c7SJunchao Zhang   ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
454f659e5c7SJunchao Zhang   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr);
455f659e5c7SJunchao Zhang   p    = 0; /* Counter for connected root ranks */
456f659e5c7SJunchao Zhang   q    = 0; /* Counter for connected leaves */
457f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
458f659e5c7SJunchao Zhang   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
459*b23bfdefSJunchao Zhang     buffer    = i < ndranks? (PetscInt*)link->selfbuf : (PetscInt*)link->leafbuf + (roffset[i] - roffset[ndranks]);
460f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
461f659e5c7SJunchao Zhang     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
462*b23bfdefSJunchao Zhang         if (buffer[k]) {
463f659e5c7SJunchao Zhang         esf_rmine[q]         = new_ilocal[q] = rmine[j];
464f659e5c7SJunchao Zhang         esf_rremote[q]       = rremote[j];
465f659e5c7SJunchao Zhang         new_iremote[q].index = rremote[j];
466f659e5c7SJunchao Zhang         new_iremote[q].rank  = ranks[i];
467f659e5c7SJunchao Zhang         connected            = PETSC_TRUE;
468f659e5c7SJunchao Zhang         q++;
469f659e5c7SJunchao Zhang       }
470f659e5c7SJunchao Zhang     }
471f659e5c7SJunchao Zhang     if (connected) {
472f659e5c7SJunchao Zhang       esf_ranks[p]     = ranks[i];
473f659e5c7SJunchao Zhang       esf_roffset[p+1] = q;
474f659e5c7SJunchao Zhang       p++;
475f659e5c7SJunchao Zhang     }
476f659e5c7SJunchao Zhang   }
477f659e5c7SJunchao Zhang 
478f659e5c7SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
479f659e5c7SJunchao Zhang 
480f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
481f659e5c7SJunchao Zhang   ierr         = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
482f659e5c7SJunchao Zhang   esf->nranks  = esf_nranks;
483f659e5c7SJunchao Zhang   esf->ndranks = esf_ndranks;
484f659e5c7SJunchao Zhang   esf->ranks   = esf_ranks;
485f659e5c7SJunchao Zhang   esf->roffset = esf_roffset;
486f659e5c7SJunchao Zhang   esf->rmine   = esf_rmine;
487f659e5c7SJunchao Zhang   esf->rremote = esf_rremote;
488f659e5c7SJunchao Zhang 
489f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
490f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
491f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
492f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
493f659e5c7SJunchao Zhang      expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
494f659e5c7SJunchao Zhang    */
495f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
496f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
497f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
498f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
499f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
500f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
501f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
502f659e5c7SJunchao Zhang     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
503f659e5c7SJunchao Zhang       PetscInt loc;
504f659e5c7SJunchao Zhang       ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr);
505f659e5c7SJunchao Zhang       if (loc >= 0) { /* Found in selected this root is connected */
506f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
507f659e5c7SJunchao Zhang         connected = PETSC_TRUE;
508f659e5c7SJunchao Zhang       }
509f659e5c7SJunchao Zhang     }
510f659e5c7SJunchao Zhang     if (connected) {
511f659e5c7SJunchao Zhang       bas->niranks++;
512f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
513f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
514f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
515f659e5c7SJunchao Zhang       p++;
516f659e5c7SJunchao Zhang     }
517f659e5c7SJunchao Zhang   }
518f659e5c7SJunchao Zhang   bas->itotal = q;
519f659e5c7SJunchao Zhang 
520f659e5c7SJunchao Zhang   /* Setup packing optimizations */
521*b23bfdefSJunchao Zhang   ierr = PetscSFPackSetupOptimization_Basic(esf);CHKERRQ(ierr);
522f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
523f659e5c7SJunchao Zhang 
524f659e5c7SJunchao Zhang   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
525f659e5c7SJunchao Zhang   *newsf = esf;
526f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
527f659e5c7SJunchao Zhang }
528f659e5c7SJunchao Zhang 
529f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
530f659e5c7SJunchao Zhang {
531f659e5c7SJunchao Zhang   PetscSF           esf;
532f659e5c7SJunchao Zhang   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
533*b23bfdefSJunchao Zhang   const PetscInt    *ilocal,*ioffset,*irootloc,*buffer;
534f659e5c7SJunchao Zhang   const PetscMPIInt *iranks;
535f659e5c7SJunchao Zhang   PetscSFPack_Basic link;
536f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
537f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
538f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
539f659e5c7SJunchao Zhang   MPI_Group         group;
540f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
541f659e5c7SJunchao Zhang 
542f659e5c7SJunchao Zhang   PetscFunctionBegin;
543f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
544f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
545f659e5c7SJunchao Zhang 
546f659e5c7SJunchao Zhang   /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
547f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
548f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
549f659e5c7SJunchao Zhang   ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
550f659e5c7SJunchao Zhang   ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
551f659e5c7SJunchao Zhang   for (i=0; i<nselected; i++) {
552f659e5c7SJunchao Zhang     const PetscInt l     = selected[i];
553f659e5c7SJunchao Zhang     new_ilocal[i]        = ilocal ? ilocal[l] : l;
554f659e5c7SJunchao Zhang     new_iremote[i].rank  = iremote[l].rank;
555f659e5c7SJunchao Zhang     new_iremote[i].index = iremote[l].index;
556f659e5c7SJunchao Zhang   }
557f659e5c7SJunchao Zhang 
558f659e5c7SJunchao Zhang   /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
559f659e5c7SJunchao Zhang   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
560f659e5c7SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
561f659e5c7SJunchao Zhang   for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */
562f659e5c7SJunchao Zhang 
563f659e5c7SJunchao Zhang   ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
564f659e5c7SJunchao Zhang 
565f659e5c7SJunchao Zhang   /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
566f659e5c7SJunchao Zhang      map rmine[i] (ilocal of leaves) back to selected[j]  (leaf indices).
567f659e5c7SJunchao Zhang    */
568f659e5c7SJunchao Zhang   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
569f659e5c7SJunchao Zhang   ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr);
570f659e5c7SJunchao Zhang   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
571f659e5c7SJunchao Zhang 
572f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) */
573f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr);
574f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
575f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
576f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
577f659e5c7SJunchao Zhang 
578f659e5c7SJunchao Zhang   /* Pass info about selected leaves to root buffer */
579f659e5c7SJunchao Zhang   ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,leafdata-minleaf,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */
5809d1c8addSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
581f659e5c7SJunchao Zhang   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
582f659e5c7SJunchao Zhang 
583f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
584f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
585f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
586f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
587*b23bfdefSJunchao Zhang     buffer    = i < ndiranks? (PetscInt*)link->selfbuf : (PetscInt*)link->rootbuf + (ioffset[i] - ioffset[ndiranks]);
588f659e5c7SJunchao Zhang     PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
589f659e5c7SJunchao Zhang     for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
590*b23bfdefSJunchao Zhang       if (buffer[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
591f659e5c7SJunchao Zhang     }
592f659e5c7SJunchao Zhang     if (connected) {
593f659e5c7SJunchao Zhang       bas->niranks++;
594f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++;
595f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
596f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
597f659e5c7SJunchao Zhang       p++;
598f659e5c7SJunchao Zhang     }
599f659e5c7SJunchao Zhang   }
600f659e5c7SJunchao Zhang   bas->itotal = q;
601f659e5c7SJunchao Zhang   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
602f659e5c7SJunchao Zhang 
603f659e5c7SJunchao Zhang   /* Setup packing optimizations */
604*b23bfdefSJunchao Zhang   ierr = PetscSFPackSetupOptimization_Basic(esf);CHKERRQ(ierr);
605f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
606f659e5c7SJunchao Zhang 
607f659e5c7SJunchao Zhang   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
608f659e5c7SJunchao Zhang   *newsf = esf;
609f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
610f659e5c7SJunchao Zhang }
611f659e5c7SJunchao Zhang 
6128cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
61395fce210SBarry Smith {
61440e23c03SJunchao Zhang   PetscSF_Basic  *dat;
61595fce210SBarry Smith   PetscErrorCode ierr;
61695fce210SBarry Smith 
61795fce210SBarry Smith   PetscFunctionBegin;
61895fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
61995fce210SBarry Smith   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
62095fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
62195fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
62295fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
6233482bfa8SJunchao Zhang   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
6243482bfa8SJunchao Zhang   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
62595fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
62695fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
62795fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
62895fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
6298750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
630f659e5c7SJunchao Zhang   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
631f659e5c7SJunchao Zhang   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;
63295fce210SBarry Smith 
63340e23c03SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
63440e23c03SJunchao Zhang   sf->data = (void*)dat;
63595fce210SBarry Smith   PetscFunctionReturn(0);
63695fce210SBarry Smith }
637