xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision f6d956f6ebb1636834a51ed235197d89c0d838f4)
18cd53115SBarry Smith 
240e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
395fce210SBarry Smith 
440e23c03SJunchao Zhang /*===================================================================================*/
5eb02082bSJunchao Zhang /*              Internal routines for PetscSFPack                              */
640e23c03SJunchao Zhang /*===================================================================================*/
795fce210SBarry Smith 
840e23c03SJunchao Zhang /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
940e23c03SJunchao Zhang    initialized (since we use persistent requests), then initialize them.
1095fce210SBarry Smith */
11eb02082bSJunchao Zhang static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,PetscSFPack link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
1240e23c03SJunchao Zhang {
13b23bfdefSJunchao Zhang   PetscErrorCode ierr;
1440e23c03SJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
1506f2e050SLisandro Dalcin   PetscInt       i,j,nrootranks,ndrootranks,nleafranks,ndleafranks;
1640e23c03SJunchao Zhang   const PetscInt *rootoffset,*leafoffset;
1740e23c03SJunchao Zhang   PetscMPIInt    n;
1840e23c03SJunchao Zhang   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
19eb02082bSJunchao Zhang   MPI_Datatype   unit = link->unit;
20120a1823SJunchao Zhang   PetscMemType   rootmtype,leafmtype;
2195fce210SBarry Smith 
2240e23c03SJunchao Zhang   PetscFunctionBegin;
23120a1823SJunchao Zhang   if (use_gpu_aware_mpi) {
24120a1823SJunchao Zhang     rootmtype = link->rootmtype;
25120a1823SJunchao Zhang     leafmtype = link->leafmtype;
26120a1823SJunchao Zhang   } else {
27120a1823SJunchao Zhang     rootmtype = PETSC_MEMTYPE_HOST;
28120a1823SJunchao Zhang     leafmtype = PETSC_MEMTYPE_HOST;
29120a1823SJunchao Zhang   }
30120a1823SJunchao Zhang 
31eb02082bSJunchao Zhang   if (rootreqs && !link->rootreqsinited[direction][rootmtype]) {
3240e23c03SJunchao Zhang     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
3340e23c03SJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
34eb02082bSJunchao Zhang       for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
3506f2e050SLisandro Dalcin         MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
3640e23c03SJunchao Zhang         ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
37eb02082bSJunchao Zhang         ierr = MPI_Recv_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);CHKERRQ(ierr);
3840e23c03SJunchao Zhang       }
3940e23c03SJunchao Zhang     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
40eb02082bSJunchao Zhang       for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
4106f2e050SLisandro Dalcin         MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
4240e23c03SJunchao Zhang         ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
43eb02082bSJunchao Zhang         ierr = MPI_Send_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);CHKERRQ(ierr);
4440e23c03SJunchao Zhang       }
45eb02082bSJunchao Zhang     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
46eb02082bSJunchao Zhang     link->rootreqsinited[direction][rootmtype] = PETSC_TRUE;
47eb02082bSJunchao Zhang   }
48eb02082bSJunchao Zhang 
49eb02082bSJunchao Zhang   if (leafreqs && !link->leafreqsinited[direction][leafmtype]) {
50eb02082bSJunchao Zhang     ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
51eb02082bSJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
52eb02082bSJunchao Zhang       for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
5306f2e050SLisandro Dalcin         MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
5440e23c03SJunchao Zhang         ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
55eb02082bSJunchao Zhang         ierr = MPI_Send_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);CHKERRQ(ierr);
5640e23c03SJunchao Zhang       }
57eb02082bSJunchao Zhang     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
58eb02082bSJunchao Zhang       for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
5906f2e050SLisandro Dalcin         MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
60eb02082bSJunchao Zhang         ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
61eb02082bSJunchao Zhang         ierr = MPI_Recv_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);CHKERRQ(ierr);
62eb02082bSJunchao Zhang       }
63eb02082bSJunchao Zhang     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
64eb02082bSJunchao Zhang     link->leafreqsinited[direction][leafmtype] = PETSC_TRUE;
6595fce210SBarry Smith   }
6695fce210SBarry Smith 
67eb02082bSJunchao Zhang   if (rootreqs) *rootreqs = link->rootreqs[direction][rootmtype];
68eb02082bSJunchao Zhang   if (leafreqs) *leafreqs = link->leafreqs[direction][leafmtype];
6940e23c03SJunchao Zhang   PetscFunctionReturn(0);
7095fce210SBarry Smith }
7195fce210SBarry Smith 
72b23bfdefSJunchao Zhang /* Common part shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs. */
73eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscInt nrootreqs,PetscInt nleafreqs,PetscSFPack *mylink)
7440e23c03SJunchao Zhang {
7540e23c03SJunchao Zhang   PetscErrorCode    ierr;
76b23bfdefSJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
77eb02082bSJunchao Zhang   PetscInt          i,j,nreqs,nrootranks,ndrootranks,nleafranks,ndleafranks;
7840e23c03SJunchao Zhang   const PetscInt    *rootoffset,*leafoffset;
79eb02082bSJunchao Zhang   PetscSFPack       *p,link;
80b23bfdefSJunchao Zhang   PetscBool         match;
8140e23c03SJunchao Zhang 
8240e23c03SJunchao Zhang   PetscFunctionBegin;
83b23bfdefSJunchao Zhang   ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);CHKERRQ(ierr);
849d1c8addSJunchao Zhang 
8540e23c03SJunchao Zhang   /* Look for types in cache */
86eb02082bSJunchao Zhang   for (p=&bas->avail; (link=*p); p=&link->next) {
8740e23c03SJunchao Zhang     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
8840e23c03SJunchao Zhang     if (match) {
8940e23c03SJunchao Zhang       *p = link->next; /* Remove from available list */
9040e23c03SJunchao Zhang       goto found;
9140e23c03SJunchao Zhang     }
9253deab39SPeter Brune   }
9353deab39SPeter Brune 
9440e23c03SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
9540e23c03SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
9640e23c03SJunchao Zhang   ierr = PetscNew(&link);CHKERRQ(ierr);
97eb02082bSJunchao Zhang   ierr = PetscSFPackSetUp_Host(sf,link,unit);CHKERRQ(ierr);
98b23bfdefSJunchao Zhang   ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); /* One tag per link */
9953deab39SPeter Brune 
100eb02082bSJunchao Zhang   /* Allocate root, leaf, self buffers, and MPI requests */
101eb02082bSJunchao Zhang   link->rootbuflen = rootoffset[nrootranks]-rootoffset[ndrootranks];
102eb02082bSJunchao Zhang   link->leafbuflen = leafoffset[nleafranks]-leafoffset[ndleafranks];
103a89cec69SJunchao Zhang   link->selfbuflen = rootoffset[ndrootranks];
104eb02082bSJunchao Zhang   link->nrootreqs  = nrootreqs;
105eb02082bSJunchao Zhang   link->nleafreqs  = nleafreqs;
106eb02082bSJunchao Zhang   nreqs            = (nrootreqs+nleafreqs)*4; /* Quadruple the requests since there are two communication directions and two memory types */
107eb02082bSJunchao Zhang   ierr             = PetscMalloc1(nreqs,&link->reqs);CHKERRQ(ierr);
108eb02082bSJunchao Zhang   for (i=0; i<nreqs; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */
109eb02082bSJunchao Zhang 
110eb02082bSJunchao Zhang   for (i=0; i<2; i++) { /* Two communication directions */
111eb02082bSJunchao Zhang     for (j=0; j<2; j++) { /* Two memory types */
112eb02082bSJunchao Zhang       link->rootreqs[i][j] = link->reqs + nrootreqs*(2*i+j);
113eb02082bSJunchao Zhang       link->leafreqs[i][j] = link->reqs + nrootreqs*4 + nleafreqs*(2*i+j);
114eb02082bSJunchao Zhang     }
115eb02082bSJunchao Zhang   }
11653deab39SPeter Brune 
11740e23c03SJunchao Zhang found:
118eb02082bSJunchao Zhang   link->rootmtype = rootmtype;
119eb02082bSJunchao Zhang   link->leafmtype = leafmtype;
120eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
121120a1823SJunchao Zhang   ierr = PetscSFPackSetUp_Device(sf,link,unit);CHKERRQ(ierr);
122120a1823SJunchao Zhang   if (!use_gpu_aware_mpi) {
123120a1823SJunchao Zhang     /* If not using GPU aware MPI, we always need buffers on host. In case root/leafdata is on device, we copy root/leafdata to/from
124120a1823SJunchao Zhang        these buffers for MPI. We only need buffers for remote neighbors since self-to-self communication is not done via MPI.
125120a1823SJunchao Zhang      */
12651ccb202SJunchao Zhang     if (!link->rootbuf[PETSC_MEMTYPE_HOST]) {
12751ccb202SJunchao Zhang       if (rootmtype == PETSC_MEMTYPE_DEVICE && sf->use_pinned_buf) {
12851ccb202SJunchao Zhang         ierr = PetscMallocPinnedMemory(link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);
12951ccb202SJunchao Zhang       } else {
13051ccb202SJunchao Zhang         ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);
13151ccb202SJunchao Zhang       }
13251ccb202SJunchao Zhang     }
13351ccb202SJunchao Zhang     if (!link->leafbuf[PETSC_MEMTYPE_HOST]) {
13451ccb202SJunchao Zhang       if (leafmtype == PETSC_MEMTYPE_DEVICE && sf->use_pinned_buf) {
13551ccb202SJunchao Zhang         ierr = PetscMallocPinnedMemory(link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);
13651ccb202SJunchao Zhang       } else {
13751ccb202SJunchao Zhang         ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);
13851ccb202SJunchao Zhang       }
13951ccb202SJunchao Zhang     }
140120a1823SJunchao Zhang   }
141eb02082bSJunchao Zhang #endif
142eb02082bSJunchao Zhang   if (!link->rootbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[rootmtype]);CHKERRQ(ierr);}
143eb02082bSJunchao Zhang   if (!link->leafbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[leafmtype]);CHKERRQ(ierr);}
144eb02082bSJunchao Zhang   if (!link->selfbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[rootmtype]);CHKERRQ(ierr);}
145eb02082bSJunchao Zhang   if (rootmtype != leafmtype && !link->selfbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[leafmtype]);CHKERRQ(ierr);}
146637e6665SJunchao Zhang   link->rootdata = rootdata;
147637e6665SJunchao Zhang   link->leafdata = leafdata;
14840e23c03SJunchao Zhang   link->next     = bas->inuse;
149eb02082bSJunchao Zhang   bas->inuse     = link;
15040e23c03SJunchao Zhang   *mylink        = link;
15140e23c03SJunchao Zhang   PetscFunctionReturn(0);
15295fce210SBarry Smith }
15395fce210SBarry Smith 
154eb02082bSJunchao Zhang static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscSFDirection direction,PetscSFPack *mylink)
155b23bfdefSJunchao Zhang {
156b23bfdefSJunchao Zhang   PetscErrorCode    ierr;
157eb02082bSJunchao Zhang   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks;
158b23bfdefSJunchao Zhang 
159b23bfdefSJunchao Zhang   PetscFunctionBegin;
160b23bfdefSJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);CHKERRQ(ierr);
161b23bfdefSJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
162eb02082bSJunchao Zhang   ierr = PetscSFPackGet_Basic_Common(sf,unit,rootmtype,rootdata,leafmtype,leafdata,nrootranks-ndrootranks,nleafranks-ndleafranks,mylink);CHKERRQ(ierr);
163b23bfdefSJunchao Zhang   PetscFunctionReturn(0);
164b23bfdefSJunchao Zhang }
165b23bfdefSJunchao Zhang 
16640e23c03SJunchao Zhang /*===================================================================================*/
16740e23c03SJunchao Zhang /*              SF public interface implementations                                  */
16840e23c03SJunchao Zhang /*===================================================================================*/
16940e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
17095fce210SBarry Smith {
17195fce210SBarry Smith   PetscErrorCode ierr;
172b23bfdefSJunchao Zhang   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
17395fce210SBarry Smith   PetscInt       *rlengths,*ilengths,i;
17440e23c03SJunchao Zhang   PetscMPIInt    rank,niranks,*iranks,tag;
17595fce210SBarry Smith   MPI_Comm       comm;
176b5a8e515SJed Brown   MPI_Group      group;
17740e23c03SJunchao Zhang   MPI_Request    *rootreqs,*leafreqs;
17895fce210SBarry Smith 
17995fce210SBarry Smith   PetscFunctionBegin;
180b5a8e515SJed Brown   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
181b5a8e515SJed Brown   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
182b5a8e515SJed Brown   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
18395fce210SBarry Smith   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
18440e23c03SJunchao Zhang   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
185c943f53fSJed Brown   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
18695fce210SBarry Smith   /*
18795fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
18895fce210SBarry Smith    */
189785e854fSJed Brown   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
19095fce210SBarry Smith   /* Determine number, sending ranks, and length of incoming */
19195fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
19295fce210SBarry Smith     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
19395fce210SBarry Smith   }
19440e23c03SJunchao Zhang   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
195c943f53fSJed Brown 
1960b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
1970b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
1980b899082SJunchao Zhang      small and the sorting is cheap.
1990b899082SJunchao Zhang    */
2000b899082SJunchao Zhang   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
2010b899082SJunchao Zhang 
202c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
203c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
204c943f53fSJed Brown   bas->niranks = bas->ndiranks + niranks;
205c943f53fSJed Brown   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
206c943f53fSJed Brown   bas->ioffset[0] = 0;
207c943f53fSJed Brown   for (i=0; i<bas->ndiranks; i++) {
208c943f53fSJed Brown     bas->iranks[i] = sf->ranks[i];
209c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
210c943f53fSJed Brown   }
21140e23c03SJunchao Zhang   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
21240e23c03SJunchao Zhang   for ( ; i<bas->niranks; i++) {
213c943f53fSJed Brown     bas->iranks[i] = iranks[i-bas->ndiranks];
214c943f53fSJed Brown     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
215c943f53fSJed Brown   }
216c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
21795fce210SBarry Smith   ierr = PetscFree(rlengths);CHKERRQ(ierr);
218c943f53fSJed Brown   ierr = PetscFree(iranks);CHKERRQ(ierr);
219c943f53fSJed Brown   ierr = PetscFree(ilengths);CHKERRQ(ierr);
22095fce210SBarry Smith 
22195fce210SBarry Smith   /* Send leaf identities to roots */
222c943f53fSJed Brown   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
22340e23c03SJunchao Zhang   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
22440e23c03SJunchao Zhang   for (i=bas->ndiranks; i<bas->niranks; i++) {
22540e23c03SJunchao Zhang     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr);
22640e23c03SJunchao Zhang   }
22740e23c03SJunchao Zhang   for (i=0; i<sf->nranks; i++) {
22895fce210SBarry Smith     PetscMPIInt npoints;
22995fce210SBarry Smith     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
23040e23c03SJunchao Zhang     if (i < sf->ndranks) {
23140e23c03SJunchao Zhang       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
23240e23c03SJunchao Zhang       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
23340e23c03SJunchao Zhang       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
23440e23c03SJunchao Zhang       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
235c943f53fSJed Brown       continue;
236c943f53fSJed Brown     }
23740e23c03SJunchao Zhang     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr);
238bf39f1bfSJed Brown   }
23940e23c03SJunchao Zhang   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
24040e23c03SJunchao Zhang   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
24140e23c03SJunchao Zhang   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
24295fce210SBarry Smith 
243eb02082bSJunchao Zhang   sf->selfleafdups    = PETSC_TRUE; /* The conservative assumption is there are data race */
244eb02082bSJunchao Zhang   sf->remoteleafdups  = PETSC_TRUE;
245eb02082bSJunchao Zhang   bas->selfrootdups   = PETSC_TRUE;
246eb02082bSJunchao Zhang   bas->remoterootdups = PETSC_TRUE;
247eb02082bSJunchao Zhang 
248b23bfdefSJunchao Zhang   /* Setup packing optimization for roots and leaves */
249eb02082bSJunchao Zhang   ierr = PetscSFPackSetupOptimizations_Basic(sf);CHKERRQ(ierr);
25095fce210SBarry Smith   PetscFunctionReturn(0);
25195fce210SBarry Smith }
25295fce210SBarry Smith 
25351ccb202SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
25495fce210SBarry Smith {
25595fce210SBarry Smith   PetscErrorCode ierr;
25695fce210SBarry Smith 
25795fce210SBarry Smith   PetscFunctionBegin;
258e55864a3SBarry Smith   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr);
25951ccb202SJunchao Zhang   ierr = PetscOptionsBool("-sf_use_pinned_buffer","Use pinned (nonpagable) memory for send/recv buffers on host","PetscSFSetFromOptions",sf->use_pinned_buf,&sf->use_pinned_buf,NULL);CHKERRQ(ierr);
26095fce210SBarry Smith   ierr = PetscOptionsTail();CHKERRQ(ierr);
26195fce210SBarry Smith   PetscFunctionReturn(0);
26295fce210SBarry Smith }
26395fce210SBarry Smith 
26440e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
26595fce210SBarry Smith {
26695fce210SBarry Smith   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
26795fce210SBarry Smith   PetscErrorCode    ierr;
26895fce210SBarry Smith 
26995fce210SBarry Smith   PetscFunctionBegin;
27029046d53SLisandro Dalcin   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
271c943f53fSJed Brown   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
272c943f53fSJed Brown   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
273eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
274eb02082bSJunchao Zhang   if (bas->irootloc_d) {cudaError_t err = cudaFree(bas->irootloc_d);CHKERRCUDA(err);bas->irootloc_d=NULL;}
275eb02082bSJunchao Zhang #endif
27651ccb202SJunchao Zhang   ierr = PetscSFPackDestroyAvailable(sf,&bas->avail);CHKERRQ(ierr);
277eb02082bSJunchao Zhang   ierr = PetscSFPackDestroyOptimizations_Basic(sf);CHKERRQ(ierr);
27895fce210SBarry Smith   PetscFunctionReturn(0);
27995fce210SBarry Smith }
28095fce210SBarry Smith 
28140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
28295fce210SBarry Smith {
28395fce210SBarry Smith   PetscErrorCode ierr;
28495fce210SBarry Smith 
28595fce210SBarry Smith   PetscFunctionBegin;
286*f6d956f6SStefano Zampini   ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr);
28795fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
28895fce210SBarry Smith   PetscFunctionReturn(0);
28995fce210SBarry Smith }
29095fce210SBarry Smith 
29140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
29295fce210SBarry Smith {
29395fce210SBarry Smith   PetscErrorCode ierr;
29495fce210SBarry Smith   PetscBool      iascii;
29595fce210SBarry Smith 
29695fce210SBarry Smith   PetscFunctionBegin;
29795fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
298b23bfdefSJunchao Zhang   if (iascii) {ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
29995fce210SBarry Smith   PetscFunctionReturn(0);
30095fce210SBarry Smith }
30195fce210SBarry Smith 
302eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
30395fce210SBarry Smith {
30495fce210SBarry Smith   PetscErrorCode    ierr;
305eb02082bSJunchao Zhang   PetscSFPack       link;
306eb02082bSJunchao Zhang   const PetscInt    *rootloc = NULL;
30795fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
30895fce210SBarry Smith 
30995fce210SBarry Smith   PetscFunctionBegin;
310eb02082bSJunchao Zhang   ierr = PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr);
311eb02082bSJunchao Zhang   ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr);
31295fce210SBarry Smith 
313b23bfdefSJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
314b23bfdefSJunchao Zhang   /* Post Irecv. Note distinguished ranks receive data via shared memory (i.e., not via MPI) */
315eb02082bSJunchao Zhang   ierr = MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr);
31630e38525SJunchao Zhang 
317b23bfdefSJunchao Zhang   /* Do Isend */
318eb02082bSJunchao Zhang   ierr = PetscSFPackRootData(sf,link,rootloc,rootdata,PETSC_TRUE);CHKERRQ(ierr);
319eb02082bSJunchao Zhang   ierr = MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr);
320eb02082bSJunchao Zhang 
321eb02082bSJunchao Zhang   /* Do self to self communication via memcpy only when rootdata and leafdata are in different memory */
322eb02082bSJunchao Zhang   if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);}
32395fce210SBarry Smith   PetscFunctionReturn(0);
32495fce210SBarry Smith }
32595fce210SBarry Smith 
326eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
32795fce210SBarry Smith {
32895fce210SBarry Smith   PetscErrorCode    ierr;
329eb02082bSJunchao Zhang   PetscSFPack       link;
330eb02082bSJunchao Zhang   const PetscInt    *leafloc = NULL;
33195fce210SBarry Smith 
33295fce210SBarry Smith   PetscFunctionBegin;
333eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
334b7c0d12aSJunchao Zhang   ierr = PetscSFPackWaitall(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
335eb02082bSJunchao Zhang   ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);CHKERRQ(ierr);
336eb02082bSJunchao Zhang   ierr = PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafdata,op,PETSC_TRUE);CHKERRQ(ierr);
337eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
33895fce210SBarry Smith   PetscFunctionReturn(0);
33995fce210SBarry Smith }
34095fce210SBarry Smith 
34195fce210SBarry Smith /* leaf -> root with reduction */
342eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
34395fce210SBarry Smith {
34495fce210SBarry Smith   PetscErrorCode    ierr;
345eb02082bSJunchao Zhang   PetscSFPack       link;
346eb02082bSJunchao Zhang   const PetscInt    *leafloc = NULL;
347277f51e8SBarry Smith   MPI_Request       *rootreqs = NULL,*leafreqs = NULL; /* dummy values for compiler warnings about uninitialized value */
34895fce210SBarry Smith 
34995fce210SBarry Smith   PetscFunctionBegin;
350eb02082bSJunchao Zhang   ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);
35195fce210SBarry Smith 
352eb02082bSJunchao Zhang   ierr = PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr);
353b23bfdefSJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr);
354c943f53fSJed Brown   /* Eagerly post root receives for non-distinguished ranks */
355eb02082bSJunchao Zhang   ierr = MPI_Startall_irecv(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr);
35630e38525SJunchao Zhang 
35795fce210SBarry Smith   /* Pack and send leaf data */
358eb02082bSJunchao Zhang   ierr = PetscSFPackLeafData(sf,link,leafloc,leafdata,PETSC_TRUE);CHKERRQ(ierr);
359eb02082bSJunchao Zhang   ierr = MPI_Startall_isend(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr);
360eb02082bSJunchao Zhang 
361eb02082bSJunchao Zhang   if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(rootmtype,leafmtype,link->selfbuf[rootmtype],link->selfbuf[leafmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);}
36295fce210SBarry Smith   PetscFunctionReturn(0);
36395fce210SBarry Smith }
36495fce210SBarry Smith 
365eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
36695fce210SBarry Smith {
36795fce210SBarry Smith   PetscErrorCode    ierr;
368eb02082bSJunchao Zhang   PetscSFPack       link;
369eb02082bSJunchao Zhang   const PetscInt    *rootloc = NULL;
37095fce210SBarry Smith 
37195fce210SBarry Smith   PetscFunctionBegin;
372eb02082bSJunchao Zhang   ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr);
373eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
374b7c0d12aSJunchao Zhang   ierr = PetscSFPackWaitall(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
375eb02082bSJunchao Zhang   ierr = PetscSFUnpackAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);CHKERRQ(ierr);
376eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
37795fce210SBarry Smith   PetscFunctionReturn(0);
37895fce210SBarry Smith }
37995fce210SBarry Smith 
380eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
38195fce210SBarry Smith {
38295fce210SBarry Smith   PetscErrorCode ierr;
38395fce210SBarry Smith 
38495fce210SBarry Smith   PetscFunctionBegin;
38540e23c03SJunchao Zhang   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
38695fce210SBarry Smith   PetscFunctionReturn(0);
38795fce210SBarry Smith }
38895fce210SBarry Smith 
389eb02082bSJunchao Zhang static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
39095fce210SBarry Smith {
39195fce210SBarry Smith   PetscErrorCode    ierr;
392eb02082bSJunchao Zhang   PetscSFPack       link;
393eb02082bSJunchao Zhang   const PetscInt    *rootloc = NULL,*leafloc = NULL;
39495fce210SBarry Smith   MPI_Request       *rootreqs,*leafreqs;
39595fce210SBarry Smith 
39695fce210SBarry Smith   PetscFunctionBegin;
397eb02082bSJunchao Zhang   ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr);
398eb02082bSJunchao Zhang   ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);CHKERRQ(ierr);
399eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
40095fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
401b7c0d12aSJunchao Zhang   ierr = PetscSFPackWaitall(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
402b23bfdefSJunchao Zhang   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
40340e23c03SJunchao Zhang 
40495fce210SBarry Smith   /* Post leaf receives */
405eb02082bSJunchao Zhang   ierr = MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr);
40630e38525SJunchao Zhang 
40795fce210SBarry Smith   /* Process local fetch-and-op, post root sends */
408eb02082bSJunchao Zhang   ierr = PetscSFFetchAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);CHKERRQ(ierr);
409eb02082bSJunchao Zhang   ierr = MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr);
410eb02082bSJunchao Zhang   if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);}
411b23bfdefSJunchao Zhang 
412b23bfdefSJunchao Zhang   /* Unpack and insert fetched data into leaves */
413b7c0d12aSJunchao Zhang   ierr = PetscSFPackWaitall(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
414eb02082bSJunchao Zhang   ierr = PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafupdate,MPIU_REPLACE,PETSC_TRUE);CHKERRQ(ierr);
415eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
41695fce210SBarry Smith   PetscFunctionReturn(0);
41795fce210SBarry Smith }
41895fce210SBarry Smith 
41940e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
4208750ddebSJunchao Zhang {
4218750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
4228750ddebSJunchao Zhang 
4238750ddebSJunchao Zhang   PetscFunctionBegin;
4248750ddebSJunchao Zhang   if (niranks)  *niranks  = bas->niranks;
4258750ddebSJunchao Zhang   if (iranks)   *iranks   = bas->iranks;
4268750ddebSJunchao Zhang   if (ioffset)  *ioffset  = bas->ioffset;
4278750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
4288750ddebSJunchao Zhang   PetscFunctionReturn(0);
4298750ddebSJunchao Zhang }
4308750ddebSJunchao Zhang 
431f659e5c7SJunchao Zhang /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
432f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
433f659e5c7SJunchao Zhang    was sorted before calling the routine.
434f659e5c7SJunchao Zhang  */
435f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
436f659e5c7SJunchao Zhang {
437f659e5c7SJunchao Zhang   PetscSF           esf;
438b23bfdefSJunchao Zhang   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote,count;
439b23bfdefSJunchao Zhang   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
440f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
441f659e5c7SJunchao Zhang   const PetscMPIInt *ranks,*iranks;
442b23bfdefSJunchao Zhang   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc,*buffer;
443f659e5c7SJunchao Zhang   PetscBool         connected;
444eb02082bSJunchao Zhang   PetscSFPack       link;
445f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
446f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
447f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
448f659e5c7SJunchao Zhang 
449f659e5c7SJunchao Zhang   PetscFunctionBegin;
450f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
451f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
452f659e5c7SJunchao Zhang 
453f659e5c7SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf */
454f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
455f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
456f659e5c7SJunchao Zhang   /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
457f659e5c7SJunchao Zhang   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
458f659e5c7SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
459f659e5c7SJunchao Zhang   /* Tag selected roots */
460f659e5c7SJunchao Zhang   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
461f659e5c7SJunchao Zhang 
462f659e5c7SJunchao Zhang   /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
463f659e5c7SJunchao Zhang      sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
464f659e5c7SJunchao Zhang      interested in since it tells which leaves are connected to which ranks.
465f659e5c7SJunchao Zhang    */
466eb02082bSJunchao Zhang   ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,rootdata,PETSC_MEMTYPE_HOST,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */
467eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
468b7c0d12aSJunchao Zhang   ierr = PetscSFPackWaitall(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
469f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
470f659e5c7SJunchao Zhang   esf_nranks = esf_ndranks = connected_leaves = 0;
471b23bfdefSJunchao Zhang   for (i=0; i<nranks; i++) {
472f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
473eb02082bSJunchao Zhang     buffer    = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
474b23bfdefSJunchao Zhang     count     = roffset[i+1] - roffset[i];
475b23bfdefSJunchao Zhang     for (j=0; j<count; j++) {if (buffer[j]) {connected_leaves++; connected = PETSC_TRUE;}}
476f659e5c7SJunchao Zhang     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
477f659e5c7SJunchao Zhang   }
478f659e5c7SJunchao Zhang 
479f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
480f659e5c7SJunchao Zhang   ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
481f659e5c7SJunchao Zhang   ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
482f659e5c7SJunchao Zhang   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr);
483f659e5c7SJunchao Zhang   p    = 0; /* Counter for connected root ranks */
484f659e5c7SJunchao Zhang   q    = 0; /* Counter for connected leaves */
485f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
486f659e5c7SJunchao Zhang   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
487eb02082bSJunchao Zhang     buffer    = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
488f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
489f659e5c7SJunchao Zhang     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
490b23bfdefSJunchao Zhang       if (buffer[k]) {
491f659e5c7SJunchao Zhang         esf_rmine[q]         = new_ilocal[q] = rmine[j];
492f659e5c7SJunchao Zhang         esf_rremote[q]       = rremote[j];
493f659e5c7SJunchao Zhang         new_iremote[q].index = rremote[j];
494f659e5c7SJunchao Zhang         new_iremote[q].rank  = ranks[i];
495f659e5c7SJunchao Zhang         connected            = PETSC_TRUE;
496f659e5c7SJunchao Zhang         q++;
497f659e5c7SJunchao Zhang       }
498f659e5c7SJunchao Zhang     }
499f659e5c7SJunchao Zhang     if (connected) {
500f659e5c7SJunchao Zhang       esf_ranks[p]     = ranks[i];
501f659e5c7SJunchao Zhang       esf_roffset[p+1] = q;
502f659e5c7SJunchao Zhang       p++;
503f659e5c7SJunchao Zhang     }
504f659e5c7SJunchao Zhang   }
505f659e5c7SJunchao Zhang 
506eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
507f659e5c7SJunchao Zhang 
508f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
509f659e5c7SJunchao Zhang   ierr         = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
510f659e5c7SJunchao Zhang   esf->nranks  = esf_nranks;
511f659e5c7SJunchao Zhang   esf->ndranks = esf_ndranks;
512f659e5c7SJunchao Zhang   esf->ranks   = esf_ranks;
513f659e5c7SJunchao Zhang   esf->roffset = esf_roffset;
514f659e5c7SJunchao Zhang   esf->rmine   = esf_rmine;
515f659e5c7SJunchao Zhang   esf->rremote = esf_rremote;
516f659e5c7SJunchao Zhang 
517f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
518f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
519f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
520f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
521f659e5c7SJunchao Zhang      expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
522f659e5c7SJunchao Zhang    */
523f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
524f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
525f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
526f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
527f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
528f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
529f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
530f659e5c7SJunchao Zhang     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
531f659e5c7SJunchao Zhang       PetscInt loc;
532f659e5c7SJunchao Zhang       ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr);
533f659e5c7SJunchao Zhang       if (loc >= 0) { /* Found in selected this root is connected */
534f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
535f659e5c7SJunchao Zhang         connected = PETSC_TRUE;
536f659e5c7SJunchao Zhang       }
537f659e5c7SJunchao Zhang     }
538f659e5c7SJunchao Zhang     if (connected) {
539f659e5c7SJunchao Zhang       bas->niranks++;
540f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
541f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
542f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
543f659e5c7SJunchao Zhang       p++;
544f659e5c7SJunchao Zhang     }
545f659e5c7SJunchao Zhang   }
546f659e5c7SJunchao Zhang   bas->itotal = q;
547f659e5c7SJunchao Zhang 
548f659e5c7SJunchao Zhang   /* Setup packing optimizations */
549eb02082bSJunchao Zhang   ierr = PetscSFPackSetupOptimizations_Basic(esf);CHKERRQ(ierr);
550f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
551f659e5c7SJunchao Zhang 
552f659e5c7SJunchao Zhang   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
553f659e5c7SJunchao Zhang   *newsf = esf;
554f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
555f659e5c7SJunchao Zhang }
556f659e5c7SJunchao Zhang 
557f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
558f659e5c7SJunchao Zhang {
559f659e5c7SJunchao Zhang   PetscSF           esf;
560f659e5c7SJunchao Zhang   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
561b23bfdefSJunchao Zhang   const PetscInt    *ilocal,*ioffset,*irootloc,*buffer;
562f659e5c7SJunchao Zhang   const PetscMPIInt *iranks;
563eb02082bSJunchao Zhang   PetscSFPack       link;
564f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
565f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
566f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
567f659e5c7SJunchao Zhang   MPI_Group         group;
568f659e5c7SJunchao Zhang   PetscErrorCode    ierr;
569f659e5c7SJunchao Zhang 
570f659e5c7SJunchao Zhang   PetscFunctionBegin;
571f659e5c7SJunchao Zhang   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
572f659e5c7SJunchao Zhang   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
573f659e5c7SJunchao Zhang 
574f659e5c7SJunchao Zhang   /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
575f659e5c7SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
576f659e5c7SJunchao Zhang   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
577f659e5c7SJunchao Zhang   ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
578f659e5c7SJunchao Zhang   ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
579f659e5c7SJunchao Zhang   for (i=0; i<nselected; i++) {
580f659e5c7SJunchao Zhang     const PetscInt l     = selected[i];
581f659e5c7SJunchao Zhang     new_ilocal[i]        = ilocal ? ilocal[l] : l;
582f659e5c7SJunchao Zhang     new_iremote[i].rank  = iremote[l].rank;
583f659e5c7SJunchao Zhang     new_iremote[i].index = iremote[l].index;
584f659e5c7SJunchao Zhang   }
585f659e5c7SJunchao Zhang 
586f659e5c7SJunchao Zhang   /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
587f659e5c7SJunchao Zhang   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
588f659e5c7SJunchao Zhang   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
589f659e5c7SJunchao Zhang   for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */
590f659e5c7SJunchao Zhang 
591f659e5c7SJunchao Zhang   ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
592f659e5c7SJunchao Zhang 
593f659e5c7SJunchao Zhang   /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
594f659e5c7SJunchao Zhang      map rmine[i] (ilocal of leaves) back to selected[j]  (leaf indices).
595f659e5c7SJunchao Zhang    */
596f659e5c7SJunchao Zhang   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
597f659e5c7SJunchao Zhang   ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr);
598f659e5c7SJunchao Zhang   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
599f659e5c7SJunchao Zhang 
600f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) */
601f659e5c7SJunchao Zhang   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr);
602f659e5c7SJunchao Zhang   bas  = (PetscSF_Basic*)esf->data;
603f659e5c7SJunchao Zhang   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
604f659e5c7SJunchao Zhang   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
605f659e5c7SJunchao Zhang 
606f659e5c7SJunchao Zhang   /* Pass info about selected leaves to root buffer */
607eb02082bSJunchao Zhang   ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,leafdata-minleaf,PETSC_MEMTYPE_HOST,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */
608eb02082bSJunchao Zhang   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
609b7c0d12aSJunchao Zhang   ierr = PetscSFPackWaitall(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
610f659e5c7SJunchao Zhang 
611f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
612f659e5c7SJunchao Zhang   p = 0; /* Counter for connected leaf ranks */
613f659e5c7SJunchao Zhang   q = 0; /* Counter for connected roots */
614f659e5c7SJunchao Zhang   for (i=0; i<niranks; i++) {
615f659e5c7SJunchao Zhang     PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
616eb02082bSJunchao Zhang     buffer = i < ndiranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->rootbuf[PETSC_MEMTYPE_HOST] + (ioffset[i] - ioffset[ndiranks]);
617f659e5c7SJunchao Zhang     for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
618b23bfdefSJunchao Zhang       if (buffer[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
619f659e5c7SJunchao Zhang     }
620f659e5c7SJunchao Zhang     if (connected) {
621f659e5c7SJunchao Zhang       bas->niranks++;
622f659e5c7SJunchao Zhang       if (i<ndiranks) bas->ndiranks++;
623f659e5c7SJunchao Zhang       bas->iranks[p]    = iranks[i];
624f659e5c7SJunchao Zhang       bas->ioffset[p+1] = q;
625f659e5c7SJunchao Zhang       p++;
626f659e5c7SJunchao Zhang     }
627f659e5c7SJunchao Zhang   }
628f659e5c7SJunchao Zhang   bas->itotal = q;
629eb02082bSJunchao Zhang   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
630f659e5c7SJunchao Zhang 
631f659e5c7SJunchao Zhang   /* Setup packing optimizations */
632eb02082bSJunchao Zhang   ierr = PetscSFPackSetupOptimizations_Basic(esf);CHKERRQ(ierr);
633f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
634f659e5c7SJunchao Zhang 
635f659e5c7SJunchao Zhang   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
636f659e5c7SJunchao Zhang   *newsf = esf;
637f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
638f659e5c7SJunchao Zhang }
639f659e5c7SJunchao Zhang 
6408cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
64195fce210SBarry Smith {
64240e23c03SJunchao Zhang   PetscSF_Basic  *dat;
64395fce210SBarry Smith   PetscErrorCode ierr;
64495fce210SBarry Smith 
64595fce210SBarry Smith   PetscFunctionBegin;
64695fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
64795fce210SBarry Smith   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
64895fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
64995fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
65095fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
6513482bfa8SJunchao Zhang   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
6523482bfa8SJunchao Zhang   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
65395fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
65495fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
65595fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
65695fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
6578750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
658f659e5c7SJunchao Zhang   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
659f659e5c7SJunchao Zhang   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;
66095fce210SBarry Smith 
66140e23c03SJunchao Zhang   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
66240e23c03SJunchao Zhang   sf->data = (void*)dat;
66395fce210SBarry Smith   PetscFunctionReturn(0);
66495fce210SBarry Smith }
665